Path: blob/main/py-polars/tests/unit/operations/test_bitwise.py
6939 views
from __future__ import annotations12import sys3import typing45import pytest67import polars as pl8from polars.testing import assert_frame_equal, assert_series_equal9from tests.unit.conftest import INTEGER_DTYPES101112@pytest.mark.parametrize("op", ["and_", "or_"])13def test_bitwise_integral_schema(op: str) -> None:14df = pl.LazyFrame({"a": [1, 2], "b": [3, 4]})15q = df.select(getattr(pl.col("a"), op)(pl.col("b")))16assert q.collect_schema()["a"] == df.collect_schema()["a"]171819@pytest.mark.parametrize("op", ["and_", "or_", "xor"])20def test_bitwise_single_null_value_schema(op: str) -> None:21df = pl.DataFrame({"a": [True, True]})22q = df.select(getattr(pl.col("a"), op)(None))23result_schema = q.collect_schema()24assert result_schema.len() == 125assert "a" in result_schema262728def leading_zeros(v: int | None, nb: int) -> int | None:29if v is None:30return None3132b = bin(v)[2:]33blen = len(b) - len(b.lstrip("0"))34if blen == len(b):35return nb36else:37return nb - len(b) + blen383940def leading_ones(v: int | None, nb: int) -> int | None:41if v is None:42return None4344b = bin(v)[2:]45if len(b) < nb:46return 047else:48return len(b) - len(b.lstrip("1"))495051def trailing_zeros(v: int | None, nb: int) -> int | None:52if v is None:53return None5455b = bin(v)[2:]56blen = len(b) - len(b.rstrip("0"))57if blen == len(b):58return nb59else:60return blen616263def trailing_ones(v: int | None) -> int | None:64if v is None:65return None6667b = bin(v)[2:]68return len(b) - len(b.rstrip("1"))697071@pytest.mark.parametrize(72"value",73[740x00,750x01,760xFCEF_0123,770xFFFF_FFFF,780xFFF0_FFE1_ABCD_EF01,790xAAAA_AAAA_AAAA_AAAA,80None,81],82)83@pytest.mark.parametrize("dtype", [*INTEGER_DTYPES, pl.Boolean])84@pytest.mark.skipif(sys.version_info < (3, 10), reason="bit_count introduced in 3.10")85@typing.no_type_check86def test_bit_counts(value: int, dtype: pl.DataType) -> None:87bitsize = 888if "Boolean" in str(dtype):89bitsize = 190if "16" in str(dtype):91bitsize = 1692elif "32" in str(dtype):93bitsize = 3294elif "64" in str(dtype):95bitsize = 6496elif "128" in str(dtype):97bitsize = 1289899if bitsize == 1 and value is not None:100value = value & 1 != 0101102co = 1 if value else 0103cz = 0 if value else 1104elif value is not None:105value = value & ((1 << bitsize) - 1)106107if dtype.is_signed_integer() and value >> (bitsize - 1) > 0:108value = value - pow(2, bitsize - 1)109110co = value.bit_count()111cz = bitsize - co112else:113co = None114cz = None115116assert_series_equal(117pl.Series("a", [value], dtype).bitwise_count_ones(),118pl.Series("a", [co], pl.UInt32),119)120assert_series_equal(121pl.Series("a", [value], dtype).bitwise_count_zeros(),122pl.Series("a", [cz], pl.UInt32),123)124assert_series_equal(125pl.Series("a", [value], dtype).bitwise_leading_ones(),126pl.Series("a", [leading_ones(value, bitsize)], pl.UInt32),127)128assert_series_equal(129pl.Series("a", [value], dtype).bitwise_leading_zeros(),130pl.Series("a", [leading_zeros(value, bitsize)], pl.UInt32),131)132assert_series_equal(133pl.Series("a", [value], dtype).bitwise_trailing_ones(),134pl.Series("a", [trailing_ones(value)], pl.UInt32),135)136assert_series_equal(137pl.Series("a", [value], dtype).bitwise_trailing_zeros(),138pl.Series("a", [trailing_zeros(value, bitsize)], pl.UInt32),139)140141142@pytest.mark.parametrize("dtype", INTEGER_DTYPES)143def test_bit_aggregations(dtype: pl.DataType) -> None:144s = pl.Series("a", [0x74, 0x1C, 0x05], dtype)145146df = s.to_frame().select(147AND=pl.col.a.bitwise_and(),148OR=pl.col.a.bitwise_or(),149XOR=pl.col.a.bitwise_xor(),150)151152assert_frame_equal(153df,154pl.DataFrame(155[156pl.Series("AND", [0x04], dtype),157pl.Series("OR", [0x7D], dtype),158pl.Series("XOR", [0x6D], dtype),159]160),161)162163164@pytest.mark.parametrize("dtype", INTEGER_DTYPES)165def test_bit_aggregations_lazy_no_nulls(dtype: pl.DataType) -> None:166s = pl.Series("a", [0x74, 0x1C, 0x05], dtype)167168lf = s.to_frame().lazy()169170out = lf.select(171AND=pl.col.a.bitwise_and(),172OR=pl.col.a.bitwise_or(),173XOR=pl.col.a.bitwise_xor(),174).collect()175176assert_frame_equal(177out,178pl.DataFrame(179[180pl.Series("AND", [0x04], dtype),181pl.Series("OR", [0x7D], dtype),182pl.Series("XOR", [0x6D], dtype),183]184),185)186187188@pytest.mark.parametrize("dtype", INTEGER_DTYPES)189def test_bit_aggregations_lazy_some_nulls(dtype: pl.DataType) -> None:190s = pl.Series("a", [0x74, None, 0x1C, None, 0x05], dtype)191out = (192s.to_frame()193.lazy()194.select(195AND=pl.col.a.bitwise_and(),196OR=pl.col.a.bitwise_or(),197XOR=pl.col.a.bitwise_xor(),198)199.collect()200)201202assert_frame_equal(203out,204pl.DataFrame(205[206pl.Series("AND", [0x04], dtype),207pl.Series("OR", [0x7D], dtype),208pl.Series("XOR", [0x6D], dtype),209]210),211)212213214@pytest.mark.parametrize(215"expr",216[pl.col("a").bitwise_and(), pl.col("a").bitwise_or(), pl.col("a").bitwise_xor()],217)218def test_bit_aggregations_lazy_all_nulls(expr: pl.Expr) -> None:219dtype = pl.Int64220s = pl.Series("a", [None, None, None], dtype)221out = s.to_frame().lazy().select(OUT=expr).collect()222223assert_frame_equal(224out,225pl.DataFrame([pl.Series("OUT", [None], dtype)]),226)227228229@pytest.mark.parametrize("dtype", INTEGER_DTYPES)230def test_bit_group_by(dtype: pl.DataType) -> None:231df = pl.DataFrame(232[233pl.Series("g", [4, 1, 1, 2, 3, 2, 4, 4], pl.Int8),234pl.Series("a", [0x03, 0x74, 0x1C, 0x05, None, 0x70, 0x01, None], dtype),235]236)237238df = df.group_by("g").agg(239AND=pl.col.a.bitwise_and(),240OR=pl.col.a.bitwise_or(),241XOR=pl.col.a.bitwise_xor(),242)243244assert_frame_equal(245df,246pl.DataFrame(247[248pl.Series("g", [1, 2, 3, 4], pl.Int8),249pl.Series("AND", [0x74 & 0x1C, 0x05 & 0x70, None, 0x01], dtype),250pl.Series("OR", [0x74 | 0x1C, 0x05 | 0x70, None, 0x03], dtype),251pl.Series("XOR", [0x74 ^ 0x1C, 0x05 ^ 0x70, None, 0x02], dtype),252]253),254check_row_order=False,255)256257258def test_scalar_bitwise_xor() -> None:259df = pl.select(260pl.repeat(pl.lit(0x80, pl.UInt8), i).bitwise_xor().alias(f"l{i}")261for i in range(5)262).transpose()263264assert_series_equal(265df.to_series(),266pl.Series("x", [None, 0x80, 0x00, 0x80, 0x00], pl.UInt8),267check_names=False,268)269270271@pytest.mark.parametrize(272("expr", "result"),273[274(pl.all().bitwise_and(), [True, False, False, True, False, None]),275(pl.all().bitwise_or(), [True, True, False, True, False, None]),276(pl.all().bitwise_xor(), [False, True, False, True, False, None]),277],278)279def test_bool_bitwise_with_nulls_23314(expr: pl.Expr, result: list[bool]) -> None:280df = pl.DataFrame(281{282"a": [True, True, None],283"b": [True, False, None],284"c": [False, False, None],285"d": [True, None, None],286"e": [False, None, None],287"f": [None, None, None],288},289schema_overrides={"f": pl.Boolean},290)291columns = ["a", "b", "c", "d", "e", "f"]292out = df.select(expr)293expected = pl.DataFrame(294[result], orient="row", schema=columns, schema_overrides={"f": pl.Boolean}295)296assert_frame_equal(out, expected)297298299@pytest.mark.parametrize(300("expr", "result"),301[302(pl.all().bitwise_and(), [True, False, False, False, False, None]),303(pl.all().bitwise_or(), [True, True, True, False, True, None]),304(pl.all().bitwise_xor(), [True, False, True, False, True, None]),305],306)307def test_bitwise_boolean(expr: pl.Expr, result: list[bool]) -> None:308lf = pl.LazyFrame(309{310"a": [True, True, True],311"b": [True, False, True],312"c": [False, True, False],313"d": [False, False, False],314"x": [True, False, None],315"z": [None, None, None],316},317schema_overrides={"z": pl.Boolean},318)319320columns = ["a", "b", "c", "d", "x", "z"]321expected = pl.DataFrame(322[result], orient="row", schema=columns, schema_overrides={"z": pl.Boolean}323)324out = lf.select(expr).collect()325assert_frame_equal(out, expected)326327328# Although there is no way to deterministically trigger the `evict` path329# in the code, the below test will do so with high likelihood330# POLARS_MAX_THREADS is only honored when tested in isolation, see issue #22070331def test_bitwise_boolean_evict_path(monkeypatch: pytest.MonkeyPatch) -> None:332monkeypatch.setenv("POLARS_MAX_THREADS", "1")333monkeypatch.setenv("POLARS_HOT_TABLE_SIZE", "2")334n_groups = 100335group_size_pairs = 10336group_size = group_size_pairs * 2337338col_a = list(range(group_size)) * n_groups339col_b = [True, False] * group_size_pairs * n_groups340df = pl.DataFrame({"a": pl.Series(col_a), "b": pl.Series(col_b)}).sort("a")341342out = (343df.lazy()344.group_by("a")345.agg(346[347pl.col("b").bitwise_and().alias("bitwise_and"),348pl.col("b").bitwise_or().alias("bitwise_or"),349pl.col("b").bitwise_xor().alias("bitwise_xor"),350]351)352.sort("a")353.collect()354)355expected = pl.DataFrame(356{357"a": list(range(group_size)),358"bitwise_and": [True, False] * group_size_pairs,359"bitwise_or": [True, False] * group_size_pairs,360"bitwise_xor": [n_groups % 2 == 1, False] * group_size_pairs,361}362)363assert_frame_equal(out, expected)364365366