Path: blob/main/py-polars/tests/unit/expr/test_binary.py
7884 views
from __future__ import annotations12import polars as pl3from polars.testing import assert_frame_equal456def test_binary_slice_basic() -> None:7"""Test basic binary slicing with positive offset and length."""8df = pl.DataFrame(9{10"data": [11b"\x00\x01\x02\x03\x04",12b"\xff\xfe\xfd\xfc\xfb",13b"\x10\x20\x30",14None,15]16}17)1819result = df.select(pl.col("data").bin.slice(1, 3).alias("sliced"))20expected = pl.DataFrame(21{22"sliced": [23b"\x01\x02\x03",24b"\xfe\xfd\xfc",25b"\x20\x30",26None,27]28}29)30assert_frame_equal(result, expected)313233def test_binary_slice_negative_offset() -> None:34"""Test binary slicing with negative offset."""35df = pl.DataFrame(36{37"data": [38b"\x00\x01\x02\x03\x04",39b"\xff\xfe\xfd\xfc\xfb",40b"\x10\x20\x30",41None,42]43}44)4546result = df.select(pl.col("data").bin.slice(-3, 2).alias("sliced"))47expected = pl.DataFrame(48{49"sliced": [50b"\x02\x03",51b"\xfd\xfc",52b"\x10\x20",53None,54]55}56)57assert_frame_equal(result, expected)585960def test_binary_slice_to_end() -> None:61"""Test binary slicing to end (no length specified)."""62df = pl.DataFrame(63{64"data": [65b"\x00\x01\x02\x03\x04",66b"\xff\xfe\xfd\xfc\xfb",67b"\x10\x20\x30",68None,69]70}71)7273result = df.select(pl.col("data").bin.slice(2).alias("sliced"))74expected = pl.DataFrame(75{76"sliced": [77b"\x02\x03\x04",78b"\xfd\xfc\xfb",79b"\x30",80None,81]82}83)84assert_frame_equal(result, expected)858687def test_binary_slice_with_expression() -> None:88"""Test binary slicing with offset as expression."""89df = pl.DataFrame(90{91"data": [92b"\x00\x01\x02\x03\x04",93b"\xff\xfe\xfd\xfc\xfb",94b"\x10\x20\x30",95None,96],97"offset": [0, 1, 2, 0],98}99)100101result = df.select(pl.col("data").bin.slice(pl.col("offset"), 2).alias("sliced"))102expected = pl.DataFrame(103{104"sliced": [105b"\x00\x01",106b"\xfe\xfd",107b"\x30",108None,109]110}111)112assert_frame_equal(result, expected)113114115def test_binary_slice_zero_length() -> None:116"""Test binary slicing with zero length."""117df = pl.DataFrame({"data": [b"\x00\x01\x02\x03\x04"]})118119result = df.select(pl.col("data").bin.slice(1, 0).alias("sliced"))120expected = pl.DataFrame({"sliced": [b""]})121assert_frame_equal(result, expected)122123124def test_binary_slice_out_of_bounds() -> None:125"""Test binary slicing with out of bounds indices."""126df = pl.DataFrame({"data": [b"\x00\x01\x02"]})127128# Offset beyond length129result = df.select(pl.col("data").bin.slice(10, 2).alias("sliced"))130expected = pl.DataFrame({"sliced": [b""]})131assert_frame_equal(result, expected)132133# Length beyond available data134result = df.select(pl.col("data").bin.slice(1, 100).alias("sliced"))135expected = pl.DataFrame({"sliced": [b"\x01\x02"]})136assert_frame_equal(result, expected)137138139def test_binary_head_basic() -> None:140"""Test basic binary head with positive n."""141df = pl.DataFrame(142{143"data": [144b"\x00\x01\x02\x03\x04",145b"\xff\xfe\xfd",146b"\x10",147None,148]149}150)151152result = df.select(pl.col("data").bin.head(3).alias("head"))153expected = pl.DataFrame(154{155"head": [156b"\x00\x01\x02",157b"\xff\xfe\xfd",158b"\x10",159None,160]161}162)163assert_frame_equal(result, expected)164165166def test_binary_head_larger_than_data() -> None:167"""Test binary head with n larger than data length."""168df = pl.DataFrame(169{170"data": [171b"\x00\x01\x02\x03\x04",172b"\xff\xfe\xfd",173b"\x10",174None,175]176}177)178179result = df.select(pl.col("data").bin.head(10).alias("head"))180expected = pl.DataFrame(181{182"head": [183b"\x00\x01\x02\x03\x04",184b"\xff\xfe\xfd",185b"\x10",186None,187]188}189)190assert_frame_equal(result, expected)191192193def test_binary_head_negative() -> None:194"""Test binary head with negative n (all but last n)."""195df = pl.DataFrame(196{197"data": [198b"\x00\x01\x02\x03\x04",199b"\xff\xfe\xfd",200b"\x10",201None,202]203}204)205206result = df.select(pl.col("data").bin.head(-2).alias("head"))207expected = pl.DataFrame(208{209"head": [210b"\x00\x01\x02",211b"\xff",212b"",213None,214]215}216)217assert_frame_equal(result, expected)218219220def test_binary_head_zero() -> None:221"""Test binary head with n=0."""222df = pl.DataFrame(223{224"data": [225b"\x00\x01\x02\x03\x04",226b"\xff\xfe\xfd",227b"\x10",228None,229]230}231)232233result = df.select(pl.col("data").bin.head(0).alias("head"))234expected = pl.DataFrame({"head": [b"", b"", b"", None]})235assert_frame_equal(result, expected)236237238def test_binary_head_with_expression() -> None:239"""Test binary head with n as expression."""240df = pl.DataFrame(241{242"data": [243b"\x00\x01\x02\x03\x04",244b"\xff\xfe\xfd",245b"\x10",246None,247],248"n": [2, 1, 1, 0],249}250)251252result = df.select(pl.col("data").bin.head(pl.col("n")).alias("head"))253expected = pl.DataFrame(254{255"head": [256b"\x00\x01",257b"\xff",258b"\x10",259None,260]261}262)263assert_frame_equal(result, expected)264265266def test_binary_head_default() -> None:267"""Test binary head with default n=5."""268df = pl.DataFrame({"data": [b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"]})269270result = df.select(pl.col("data").bin.head().alias("head"))271expected = pl.DataFrame({"head": [b"\x00\x01\x02\x03\x04"]})272assert_frame_equal(result, expected)273274275def test_binary_tail_basic() -> None:276"""Test basic binary tail with positive n."""277df = pl.DataFrame(278{279"data": [280b"\x00\x01\x02\x03\x04",281b"\xff\xfe\xfd",282b"\x10",283None,284]285}286)287288result = df.select(pl.col("data").bin.tail(3).alias("tail"))289expected = pl.DataFrame(290{291"tail": [292b"\x02\x03\x04",293b"\xff\xfe\xfd",294b"\x10",295None,296]297}298)299assert_frame_equal(result, expected)300301302def test_binary_tail_larger_than_data() -> None:303"""Test binary tail with n larger than data length."""304df = pl.DataFrame(305{306"data": [307b"\x00\x01\x02\x03\x04",308b"\xff\xfe\xfd",309b"\x10",310None,311]312}313)314315result = df.select(pl.col("data").bin.tail(10).alias("tail"))316expected = pl.DataFrame(317{318"tail": [319b"\x00\x01\x02\x03\x04",320b"\xff\xfe\xfd",321b"\x10",322None,323]324}325)326assert_frame_equal(result, expected)327328329def test_binary_tail_negative() -> None:330"""Test binary tail with negative n (all but first n)."""331df = pl.DataFrame(332{333"data": [334b"\x00\x01\x02\x03\x04",335b"\xff\xfe\xfd",336b"\x10",337None,338]339}340)341342result = df.select(pl.col("data").bin.tail(-2).alias("tail"))343expected = pl.DataFrame(344{345"tail": [346b"\x02\x03\x04",347b"\xfd",348b"",349None,350]351}352)353assert_frame_equal(result, expected)354355356def test_binary_tail_zero() -> None:357"""Test binary tail with n=0."""358df = pl.DataFrame(359{360"data": [361b"\x00\x01\x02\x03\x04",362b"\xff\xfe\xfd",363b"\x10",364None,365]366}367)368369result = df.select(pl.col("data").bin.tail(0).alias("tail"))370expected = pl.DataFrame({"tail": [b"", b"", b"", None]})371assert_frame_equal(result, expected)372373374def test_binary_tail_with_expression() -> None:375"""Test binary tail with n as expression."""376df = pl.DataFrame(377{378"data": [379b"\x00\x01\x02\x03\x04",380b"\xff\xfe\xfd",381b"\x10",382None,383],384"n": [2, 1, 1, 0],385}386)387388result = df.select(pl.col("data").bin.tail(pl.col("n")).alias("tail"))389expected = pl.DataFrame(390{391"tail": [392b"\x03\x04",393b"\xfd",394b"\x10",395None,396]397}398)399assert_frame_equal(result, expected)400401402def test_binary_tail_default() -> None:403"""Test binary tail with default n=5."""404df = pl.DataFrame({"data": [b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"]})405406result = df.select(pl.col("data").bin.tail().alias("tail"))407expected = pl.DataFrame({"tail": [b"\x05\x06\x07\x08\x09"]})408assert_frame_equal(result, expected)409410411def test_binary_head_then_tail() -> None:412"""Test chaining head and tail operations."""413df = pl.DataFrame({"data": [b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"]})414415result = df.select(pl.col("data").bin.head(8).bin.tail(6).alias("middle"))416expected = pl.DataFrame({"middle": [b"\x02\x03\x04\x05\x06\x07"]})417assert_frame_equal(result, expected)418419420def test_binary_slice_then_head() -> None:421"""Test chaining slice and head operations."""422df = pl.DataFrame({"data": [b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"]})423424result = df.select(pl.col("data").bin.slice(2, 6).bin.head(3).alias("combo"))425expected = pl.DataFrame({"combo": [b"\x02\x03\x04"]})426assert_frame_equal(result, expected)427428429def test_binary_tail_then_slice() -> None:430"""Test chaining tail and slice operations."""431df = pl.DataFrame({"data": [b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"]})432433result = df.select(pl.col("data").bin.tail(7).bin.slice(1, 4).alias("combo"))434expected = pl.DataFrame({"combo": [b"\x04\x05\x06\x07"]})435assert_frame_equal(result, expected)436437438def test_binary_empty() -> None:439"""Test operations on empty binary data."""440df = pl.DataFrame({"data": [b""]})441442assert_frame_equal(443df.select(pl.col("data").bin.slice(0, 5)), pl.DataFrame({"data": [b""]})444)445assert_frame_equal(446df.select(pl.col("data").bin.head(5)), pl.DataFrame({"data": [b""]})447)448assert_frame_equal(449df.select(pl.col("data").bin.tail(5)), pl.DataFrame({"data": [b""]})450)451452453def test_binary_all_nulls() -> None:454"""Test operations on all-null column."""455df = pl.DataFrame({"data": [None, None, None]}, schema={"data": pl.Binary})456457assert_frame_equal(df.select(pl.col("data").bin.slice(0, 2)), df)458assert_frame_equal(df.select(pl.col("data").bin.head(2)), df)459assert_frame_equal(df.select(pl.col("data").bin.tail(2)), df)460461462def test_binary_single_byte() -> None:463"""Test operations on single-byte binary data."""464df = pl.DataFrame({"data": [b"\xff"]})465466assert_frame_equal(df.select(pl.col("data").bin.slice(0, 1)), df)467assert_frame_equal(df.select(pl.col("data").bin.head(1)), df)468assert_frame_equal(df.select(pl.col("data").bin.tail(1)), df)469assert_frame_equal(470df.select(pl.col("data").bin.slice(0, 0)), pl.DataFrame({"data": [b""]})471)472473474