Path: blob/main/py-polars/tests/unit/operations/test_bitwise.py
8420 views
from __future__ import annotations12import typing3from typing import TYPE_CHECKING45import pytest67import polars as pl8from polars.testing import assert_frame_equal, assert_series_equal9from tests.unit.conftest import INTEGER_DTYPES1011if TYPE_CHECKING:12from tests.conftest import PlMonkeyPatch131415@pytest.mark.parametrize("op", ["and_", "or_"])16def test_bitwise_integral_schema(op: str) -> None:17df = pl.LazyFrame({"a": [1, 2], "b": [3, 4]})18q = df.select(getattr(pl.col("a"), op)(pl.col("b")))19assert q.collect_schema()["a"] == df.collect_schema()["a"]202122@pytest.mark.parametrize("op", ["and_", "or_", "xor"])23def test_bitwise_single_null_value_schema(op: str) -> None:24df = pl.DataFrame({"a": [True, True]})25q = df.select(getattr(pl.col("a"), op)(None))26result_schema = q.collect_schema()27assert result_schema.len() == 128assert "a" in result_schema293031def leading_zeros(v: int | None, nb: int) -> int | None:32if v is None:33return None3435b = bin(v)[2:]36blen = len(b) - len(b.lstrip("0"))37if blen == len(b):38return nb39else:40return nb - len(b) + blen414243def leading_ones(v: int | None, nb: int) -> int | None:44if v is None:45return None4647b = bin(v)[2:]48if len(b) < nb:49return 050else:51return len(b) - len(b.lstrip("1"))525354def trailing_zeros(v: int | None, nb: int) -> int | None:55if v is None:56return None5758b = bin(v)[2:]59blen = len(b) - len(b.rstrip("0"))60if blen == len(b):61return nb62else:63return blen646566def trailing_ones(v: int | None) -> int | None:67if v is None:68return None6970b = bin(v)[2:]71return len(b) - len(b.rstrip("1"))727374@pytest.mark.parametrize(75"value",76[770x00,780x01,790xFCEF_0123,800xFFFF_FFFF,810xFFF0_FFE1_ABCD_EF01,820xAAAA_AAAA_AAAA_AAAA,83None,84],85)86@pytest.mark.parametrize("dtype", [*INTEGER_DTYPES, pl.Boolean])87@typing.no_type_check88def test_bit_counts(value: int, dtype: pl.DataType) -> None:89bitsize = 890if "Boolean" in str(dtype):91bitsize = 192if "16" in str(dtype):93bitsize = 1694elif "32" in str(dtype):95bitsize = 3296elif "64" in str(dtype):97bitsize = 6498elif "128" in str(dtype):99bitsize = 128100101if bitsize == 1 and value is not None:102value = value & 1 != 0103104co = 1 if value else 0105cz = 0 if value else 1106elif value is not None:107value = value & ((1 << bitsize) - 1)108109if dtype.is_signed_integer() and value >> (bitsize - 1) > 0:110value = value - pow(2, bitsize - 1)111112co = value.bit_count()113cz = bitsize - co114else:115co = None116cz = None117118assert_series_equal(119pl.Series("a", [value], dtype).bitwise_count_ones(),120pl.Series("a", [co], pl.UInt32),121)122assert_series_equal(123pl.Series("a", [value], dtype).bitwise_count_zeros(),124pl.Series("a", [cz], pl.UInt32),125)126assert_series_equal(127pl.Series("a", [value], dtype).bitwise_leading_ones(),128pl.Series("a", [leading_ones(value, bitsize)], pl.UInt32),129)130assert_series_equal(131pl.Series("a", [value], dtype).bitwise_leading_zeros(),132pl.Series("a", [leading_zeros(value, bitsize)], pl.UInt32),133)134assert_series_equal(135pl.Series("a", [value], dtype).bitwise_trailing_ones(),136pl.Series("a", [trailing_ones(value)], pl.UInt32),137)138assert_series_equal(139pl.Series("a", [value], dtype).bitwise_trailing_zeros(),140pl.Series("a", [trailing_zeros(value, bitsize)], pl.UInt32),141)142143144@pytest.mark.parametrize("dtype", INTEGER_DTYPES)145def test_bit_aggregations(dtype: pl.DataType) -> None:146s = pl.Series("a", [0x74, 0x1C, 0x05], dtype)147148df = s.to_frame().select(149AND=pl.col.a.bitwise_and(),150OR=pl.col.a.bitwise_or(),151XOR=pl.col.a.bitwise_xor(),152)153154assert_frame_equal(155df,156pl.DataFrame(157[158pl.Series("AND", [0x04], dtype),159pl.Series("OR", [0x7D], dtype),160pl.Series("XOR", [0x6D], dtype),161]162),163)164165166@pytest.mark.parametrize("dtype", INTEGER_DTYPES)167def test_bit_aggregations_lazy_no_nulls(dtype: pl.DataType) -> None:168s = pl.Series("a", [0x74, 0x1C, 0x05], dtype)169170lf = s.to_frame().lazy()171172out = lf.select(173AND=pl.col.a.bitwise_and(),174OR=pl.col.a.bitwise_or(),175XOR=pl.col.a.bitwise_xor(),176).collect()177178assert_frame_equal(179out,180pl.DataFrame(181[182pl.Series("AND", [0x04], dtype),183pl.Series("OR", [0x7D], dtype),184pl.Series("XOR", [0x6D], dtype),185]186),187)188189190@pytest.mark.parametrize("dtype", INTEGER_DTYPES)191def test_bit_aggregations_lazy_some_nulls(dtype: pl.DataType) -> None:192s = pl.Series("a", [0x74, None, 0x1C, None, 0x05], dtype)193out = (194s.to_frame()195.lazy()196.select(197AND=pl.col.a.bitwise_and(),198OR=pl.col.a.bitwise_or(),199XOR=pl.col.a.bitwise_xor(),200)201.collect()202)203204assert_frame_equal(205out,206pl.DataFrame(207[208pl.Series("AND", [0x04], dtype),209pl.Series("OR", [0x7D], dtype),210pl.Series("XOR", [0x6D], dtype),211]212),213)214215216@pytest.mark.parametrize(217"expr",218[pl.col("a").bitwise_and(), pl.col("a").bitwise_or(), pl.col("a").bitwise_xor()],219)220def test_bit_aggregations_lazy_all_nulls(expr: pl.Expr) -> None:221dtype = pl.Int64222s = pl.Series("a", [None, None, None], dtype)223out = s.to_frame().lazy().select(OUT=expr).collect()224225assert_frame_equal(226out,227pl.DataFrame([pl.Series("OUT", [None], dtype)]),228)229230231@pytest.mark.parametrize("dtype", INTEGER_DTYPES)232def test_bit_group_by(dtype: pl.DataType) -> None:233df = pl.DataFrame(234[235pl.Series("g", [4, 1, 1, 2, 3, 2, 4, 4], pl.Int8),236pl.Series("a", [0x03, 0x74, 0x1C, 0x05, None, 0x70, 0x01, None], dtype),237]238)239240df = df.group_by("g").agg(241AND=pl.col.a.bitwise_and(),242OR=pl.col.a.bitwise_or(),243XOR=pl.col.a.bitwise_xor(),244)245246assert_frame_equal(247df,248pl.DataFrame(249[250pl.Series("g", [1, 2, 3, 4], pl.Int8),251pl.Series("AND", [0x74 & 0x1C, 0x05 & 0x70, None, 0x01], dtype),252pl.Series("OR", [0x74 | 0x1C, 0x05 | 0x70, None, 0x03], dtype),253pl.Series("XOR", [0x74 ^ 0x1C, 0x05 ^ 0x70, None, 0x02], dtype),254]255),256check_row_order=False,257)258259260def test_scalar_bitwise_xor() -> None:261df = pl.select(262pl.repeat(pl.lit(0x80, pl.UInt8), i).bitwise_xor().alias(f"l{i}")263for i in range(5)264).transpose()265266assert_series_equal(267df.to_series(),268pl.Series("x", [None, 0x80, 0x00, 0x80, 0x00], pl.UInt8),269check_names=False,270)271272273@pytest.mark.parametrize(274("expr", "result"),275[276(pl.all().bitwise_and(), [True, False, False, True, False, None]),277(pl.all().bitwise_or(), [True, True, False, True, False, None]),278(pl.all().bitwise_xor(), [False, True, False, True, False, None]),279],280)281def test_bool_bitwise_with_nulls_23314(expr: pl.Expr, result: list[bool]) -> None:282df = pl.DataFrame(283{284"a": [True, True, None],285"b": [True, False, None],286"c": [False, False, None],287"d": [True, None, None],288"e": [False, None, None],289"f": [None, None, None],290},291schema_overrides={"f": pl.Boolean},292)293columns = ["a", "b", "c", "d", "e", "f"]294out = df.select(expr)295expected = pl.DataFrame(296[result], orient="row", schema=columns, schema_overrides={"f": pl.Boolean}297)298assert_frame_equal(out, expected)299300301@pytest.mark.parametrize(302("expr", "result"),303[304(pl.all().bitwise_and(), [True, False, False, False, False, None]),305(pl.all().bitwise_or(), [True, True, True, False, True, None]),306(pl.all().bitwise_xor(), [True, False, True, False, True, None]),307],308)309def test_bitwise_boolean(expr: pl.Expr, result: list[bool]) -> None:310lf = pl.LazyFrame(311{312"a": [True, True, True],313"b": [True, False, True],314"c": [False, True, False],315"d": [False, False, False],316"x": [True, False, None],317"z": [None, None, None],318},319schema_overrides={"z": pl.Boolean},320)321322columns = ["a", "b", "c", "d", "x", "z"]323expected = pl.DataFrame(324[result], orient="row", schema=columns, schema_overrides={"z": pl.Boolean}325)326out = lf.select(expr).collect()327assert_frame_equal(out, expected)328329330# Although there is no way to deterministically trigger the `evict` path331# in the code, the below test will do so with high likelihood332# POLARS_MAX_THREADS is only honored when tested in isolation, see issue #22070333def test_bitwise_boolean_evict_path(plmonkeypatch: PlMonkeyPatch) -> None:334plmonkeypatch.setenv("POLARS_MAX_THREADS", "1")335plmonkeypatch.setenv("POLARS_HOT_TABLE_SIZE", "2")336n_groups = 100337group_size_pairs = 10338group_size = group_size_pairs * 2339340col_a = list(range(group_size)) * n_groups341col_b = [True, False] * group_size_pairs * n_groups342df = pl.DataFrame({"a": pl.Series(col_a), "b": pl.Series(col_b)}).sort("a")343344out = (345df.lazy()346.group_by("a")347.agg(348[349pl.col("b").bitwise_and().alias("bitwise_and"),350pl.col("b").bitwise_or().alias("bitwise_or"),351pl.col("b").bitwise_xor().alias("bitwise_xor"),352]353)354.sort("a")355.collect()356)357expected = pl.DataFrame(358{359"a": list(range(group_size)),360"bitwise_and": [True, False] * group_size_pairs,361"bitwise_or": [True, False] * group_size_pairs,362"bitwise_xor": [n_groups % 2 == 1, False] * group_size_pairs,363}364)365assert_frame_equal(out, expected)366367368def test_bitwise_in_group_by() -> None:369df = pl.DataFrame(370{371"a": [372111,373222,374111,375222,376333,377333,378999,379888,380999,381],382}383)384385assert_frame_equal(386df.group_by(pl.lit(1))387.agg(388bwand=pl.col.a.bitwise_and(),389bwor=pl.col.a.bitwise_or(),390bwxor=pl.col.a.bitwise_xor(),391)392.drop("literal"),393df.select(394bwand=pl.col.a.bitwise_and(),395bwor=pl.col.a.bitwise_or(),396bwxor=pl.col.a.bitwise_xor(),397),398)399400401