CoCalc -- test_bitwise.py

GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/operations/test_bitwise.py
⁶⁹³⁹ views
1
from __future__ import annotations
2

3
import sys
4
import typing
5

6
import pytest
7

8
import polars as pl
9
from polars.testing import assert_frame_equal, assert_series_equal
10
from tests.unit.conftest import INTEGER_DTYPES
11

12

13
@pytest.mark.parametrize("op", ["and_", "or_"])
14
def test_bitwise_integral_schema(op: str) -> None:
15
    df = pl.LazyFrame({"a": [1, 2], "b": [3, 4]})
16
    q = df.select(getattr(pl.col("a"), op)(pl.col("b")))
17
    assert q.collect_schema()["a"] == df.collect_schema()["a"]
18

19

20
@pytest.mark.parametrize("op", ["and_", "or_", "xor"])
21
def test_bitwise_single_null_value_schema(op: str) -> None:
22
    df = pl.DataFrame({"a": [True, True]})
23
    q = df.select(getattr(pl.col("a"), op)(None))
24
    result_schema = q.collect_schema()
25
    assert result_schema.len() == 1
26
    assert "a" in result_schema
27

28

29
def leading_zeros(v: int | None, nb: int) -> int | None:
30
    if v is None:
31
        return None
32

33
    b = bin(v)[2:]
34
    blen = len(b) - len(b.lstrip("0"))
35
    if blen == len(b):
36
        return nb
37
    else:
38
        return nb - len(b) + blen
39

40

41
def leading_ones(v: int | None, nb: int) -> int | None:
42
    if v is None:
43
        return None
44

45
    b = bin(v)[2:]
46
    if len(b) < nb:
47
        return 0
48
    else:
49
        return len(b) - len(b.lstrip("1"))
50

51

52
def trailing_zeros(v: int | None, nb: int) -> int | None:
53
    if v is None:
54
        return None
55

56
    b = bin(v)[2:]
57
    blen = len(b) - len(b.rstrip("0"))
58
    if blen == len(b):
59
        return nb
60
    else:
61
        return blen
62

63

64
def trailing_ones(v: int | None) -> int | None:
65
    if v is None:
66
        return None
67

68
    b = bin(v)[2:]
69
    return len(b) - len(b.rstrip("1"))
70

71

72
@pytest.mark.parametrize(
73
    "value",
74
    [
75
        0x00,
76
        0x01,
77
        0xFCEF_0123,
78
        0xFFFF_FFFF,
79
        0xFFF0_FFE1_ABCD_EF01,
80
        0xAAAA_AAAA_AAAA_AAAA,
81
        None,
82
    ],
83
)
84
@pytest.mark.parametrize("dtype", [*INTEGER_DTYPES, pl.Boolean])
85
@pytest.mark.skipif(sys.version_info < (3, 10), reason="bit_count introduced in 3.10")
86
@typing.no_type_check
87
def test_bit_counts(value: int, dtype: pl.DataType) -> None:
88
    bitsize = 8
89
    if "Boolean" in str(dtype):
90
        bitsize = 1
91
    if "16" in str(dtype):
92
        bitsize = 16
93
    elif "32" in str(dtype):
94
        bitsize = 32
95
    elif "64" in str(dtype):
96
        bitsize = 64
97
    elif "128" in str(dtype):
98
        bitsize = 128
99

100
    if bitsize == 1 and value is not None:
101
        value = value & 1 != 0
102

103
        co = 1 if value else 0
104
        cz = 0 if value else 1
105
    elif value is not None:
106
        value = value & ((1 << bitsize) - 1)
107

108
        if dtype.is_signed_integer() and value >> (bitsize - 1) > 0:
109
            value = value - pow(2, bitsize - 1)
110

111
        co = value.bit_count()
112
        cz = bitsize - co
113
    else:
114
        co = None
115
        cz = None
116

117
    assert_series_equal(
118
        pl.Series("a", [value], dtype).bitwise_count_ones(),
119
        pl.Series("a", [co], pl.UInt32),
120
    )
121
    assert_series_equal(
122
        pl.Series("a", [value], dtype).bitwise_count_zeros(),
123
        pl.Series("a", [cz], pl.UInt32),
124
    )
125
    assert_series_equal(
126
        pl.Series("a", [value], dtype).bitwise_leading_ones(),
127
        pl.Series("a", [leading_ones(value, bitsize)], pl.UInt32),
128
    )
129
    assert_series_equal(
130
        pl.Series("a", [value], dtype).bitwise_leading_zeros(),
131
        pl.Series("a", [leading_zeros(value, bitsize)], pl.UInt32),
132
    )
133
    assert_series_equal(
134
        pl.Series("a", [value], dtype).bitwise_trailing_ones(),
135
        pl.Series("a", [trailing_ones(value)], pl.UInt32),
136
    )
137
    assert_series_equal(
138
        pl.Series("a", [value], dtype).bitwise_trailing_zeros(),
139
        pl.Series("a", [trailing_zeros(value, bitsize)], pl.UInt32),
140
    )
141

142

143
@pytest.mark.parametrize("dtype", INTEGER_DTYPES)
144
def test_bit_aggregations(dtype: pl.DataType) -> None:
145
    s = pl.Series("a", [0x74, 0x1C, 0x05], dtype)
146

147
    df = s.to_frame().select(
148
        AND=pl.col.a.bitwise_and(),
149
        OR=pl.col.a.bitwise_or(),
150
        XOR=pl.col.a.bitwise_xor(),
151
    )
152

153
    assert_frame_equal(
154
        df,
155
        pl.DataFrame(
156
            [
157
                pl.Series("AND", [0x04], dtype),
158
                pl.Series("OR", [0x7D], dtype),
159
                pl.Series("XOR", [0x6D], dtype),
160
            ]
161
        ),
162
    )
163

164

165
@pytest.mark.parametrize("dtype", INTEGER_DTYPES)
166
def test_bit_aggregations_lazy_no_nulls(dtype: pl.DataType) -> None:
167
    s = pl.Series("a", [0x74, 0x1C, 0x05], dtype)
168

169
    lf = s.to_frame().lazy()
170

171
    out = lf.select(
172
        AND=pl.col.a.bitwise_and(),
173
        OR=pl.col.a.bitwise_or(),
174
        XOR=pl.col.a.bitwise_xor(),
175
    ).collect()
176

177
    assert_frame_equal(
178
        out,
179
        pl.DataFrame(
180
            [
181
                pl.Series("AND", [0x04], dtype),
182
                pl.Series("OR", [0x7D], dtype),
183
                pl.Series("XOR", [0x6D], dtype),
184
            ]
185
        ),
186
    )
187

188

189
@pytest.mark.parametrize("dtype", INTEGER_DTYPES)
190
def test_bit_aggregations_lazy_some_nulls(dtype: pl.DataType) -> None:
191
    s = pl.Series("a", [0x74, None, 0x1C, None, 0x05], dtype)
192
    out = (
193
        s.to_frame()
194
        .lazy()
195
        .select(
196
            AND=pl.col.a.bitwise_and(),
197
            OR=pl.col.a.bitwise_or(),
198
            XOR=pl.col.a.bitwise_xor(),
199
        )
200
        .collect()
201
    )
202

203
    assert_frame_equal(
204
        out,
205
        pl.DataFrame(
206
            [
207
                pl.Series("AND", [0x04], dtype),
208
                pl.Series("OR", [0x7D], dtype),
209
                pl.Series("XOR", [0x6D], dtype),
210
            ]
211
        ),
212
    )
213

214

215
@pytest.mark.parametrize(
216
    "expr",
217
    [pl.col("a").bitwise_and(), pl.col("a").bitwise_or(), pl.col("a").bitwise_xor()],
218
)
219
def test_bit_aggregations_lazy_all_nulls(expr: pl.Expr) -> None:
220
    dtype = pl.Int64
221
    s = pl.Series("a", [None, None, None], dtype)
222
    out = s.to_frame().lazy().select(OUT=expr).collect()
223

224
    assert_frame_equal(
225
        out,
226
        pl.DataFrame([pl.Series("OUT", [None], dtype)]),
227
    )
228

229

230
@pytest.mark.parametrize("dtype", INTEGER_DTYPES)
231
def test_bit_group_by(dtype: pl.DataType) -> None:
232
    df = pl.DataFrame(
233
        [
234
            pl.Series("g", [4, 1, 1, 2, 3, 2, 4, 4], pl.Int8),
235
            pl.Series("a", [0x03, 0x74, 0x1C, 0x05, None, 0x70, 0x01, None], dtype),
236
        ]
237
    )
238

239
    df = df.group_by("g").agg(
240
        AND=pl.col.a.bitwise_and(),
241
        OR=pl.col.a.bitwise_or(),
242
        XOR=pl.col.a.bitwise_xor(),
243
    )
244

245
    assert_frame_equal(
246
        df,
247
        pl.DataFrame(
248
            [
249
                pl.Series("g", [1, 2, 3, 4], pl.Int8),
250
                pl.Series("AND", [0x74 & 0x1C, 0x05 & 0x70, None, 0x01], dtype),
251
                pl.Series("OR", [0x74 | 0x1C, 0x05 | 0x70, None, 0x03], dtype),
252
                pl.Series("XOR", [0x74 ^ 0x1C, 0x05 ^ 0x70, None, 0x02], dtype),
253
            ]
254
        ),
255
        check_row_order=False,
256
    )
257

258

259
def test_scalar_bitwise_xor() -> None:
260
    df = pl.select(
261
        pl.repeat(pl.lit(0x80, pl.UInt8), i).bitwise_xor().alias(f"l{i}")
262
        for i in range(5)
263
    ).transpose()
264

265
    assert_series_equal(
266
        df.to_series(),
267
        pl.Series("x", [None, 0x80, 0x00, 0x80, 0x00], pl.UInt8),
268
        check_names=False,
269
    )
270

271

272
@pytest.mark.parametrize(
273
    ("expr", "result"),
274
    [
275
        (pl.all().bitwise_and(), [True, False, False, True, False, None]),
276
        (pl.all().bitwise_or(), [True, True, False, True, False, None]),
277
        (pl.all().bitwise_xor(), [False, True, False, True, False, None]),
278
    ],
279
)
280
def test_bool_bitwise_with_nulls_23314(expr: pl.Expr, result: list[bool]) -> None:
281
    df = pl.DataFrame(
282
        {
283
            "a": [True, True, None],
284
            "b": [True, False, None],
285
            "c": [False, False, None],
286
            "d": [True, None, None],
287
            "e": [False, None, None],
288
            "f": [None, None, None],
289
        },
290
        schema_overrides={"f": pl.Boolean},
291
    )
292
    columns = ["a", "b", "c", "d", "e", "f"]
293
    out = df.select(expr)
294
    expected = pl.DataFrame(
295
        [result], orient="row", schema=columns, schema_overrides={"f": pl.Boolean}
296
    )
297
    assert_frame_equal(out, expected)
298

299

300
@pytest.mark.parametrize(
301
    ("expr", "result"),
302
    [
303
        (pl.all().bitwise_and(), [True, False, False, False, False, None]),
304
        (pl.all().bitwise_or(), [True, True, True, False, True, None]),
305
        (pl.all().bitwise_xor(), [True, False, True, False, True, None]),
306
    ],
307
)
308
def test_bitwise_boolean(expr: pl.Expr, result: list[bool]) -> None:
309
    lf = pl.LazyFrame(
310
        {
311
            "a": [True, True, True],
312
            "b": [True, False, True],
313
            "c": [False, True, False],
314
            "d": [False, False, False],
315
            "x": [True, False, None],
316
            "z": [None, None, None],
317
        },
318
        schema_overrides={"z": pl.Boolean},
319
    )
320

321
    columns = ["a", "b", "c", "d", "x", "z"]
322
    expected = pl.DataFrame(
323
        [result], orient="row", schema=columns, schema_overrides={"z": pl.Boolean}
324
    )
325
    out = lf.select(expr).collect()
326
    assert_frame_equal(out, expected)
327

328

329
# Although there is no way to deterministically trigger the `evict` path
330
# in the code, the below test will do so with high likelihood
331
# POLARS_MAX_THREADS is only honored when tested in isolation, see issue #22070
332
def test_bitwise_boolean_evict_path(monkeypatch: pytest.MonkeyPatch) -> None:
333
    monkeypatch.setenv("POLARS_MAX_THREADS", "1")
334
    monkeypatch.setenv("POLARS_HOT_TABLE_SIZE", "2")
335
    n_groups = 100
336
    group_size_pairs = 10
337
    group_size = group_size_pairs * 2
338

339
    col_a = list(range(group_size)) * n_groups
340
    col_b = [True, False] * group_size_pairs * n_groups
341
    df = pl.DataFrame({"a": pl.Series(col_a), "b": pl.Series(col_b)}).sort("a")
342

343
    out = (
344
        df.lazy()
345
        .group_by("a")
346
        .agg(
347
            [
348
                pl.col("b").bitwise_and().alias("bitwise_and"),
349
                pl.col("b").bitwise_or().alias("bitwise_or"),
350
                pl.col("b").bitwise_xor().alias("bitwise_xor"),
351
            ]
352
        )
353
        .sort("a")
354
        .collect()
355
    )
356
    expected = pl.DataFrame(
357
        {
358
            "a": list(range(group_size)),
359
            "bitwise_and": [True, False] * group_size_pairs,
360
            "bitwise_or": [True, False] * group_size_pairs,
361
            "bitwise_xor": [n_groups % 2 == 1, False] * group_size_pairs,
362
        }
363
    )
364
    assert_frame_equal(out, expected)
365

366
Product

Resources

Company