Path: blob/main/py-polars/tests/unit/operations/test_hash.py
8422 views
import polars as pl1from polars.testing import assert_frame_equal234def test_hash_struct() -> None:5df = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})6df = df.select(pl.struct(pl.all()))7assert df.select(pl.col("a").hash())["a"].to_list() == [85535262844797696299,915139341575481673729,1012593759486533989774,11]121314def test_hash_cat_stable() -> None:15c1 = pl.Categories.random()16c2 = pl.Categories.random()1718# Different insertion order.19s1 = pl.Series(["cow", "cat", "moo"], dtype=pl.Categorical(c1))20s2 = pl.Series(["cat", "moo", "cow"], dtype=pl.Categorical(c2))2122# Same data should have same hash.23df1 = pl.DataFrame(24{"cat": ["cow", "cat", "moo"]}, schema={"cat": pl.Categorical(c1)}25)26df2 = pl.DataFrame(27{"cat": ["cow", "cat", "moo"]}, schema={"cat": pl.Categorical(c2)}28)29assert_frame_equal(30df1.select(pl.col.cat.hash()),31df2.select(pl.col.cat.hash()),32)3334# Also stable in struct?35df1_struct = df1.select(struct=pl.struct(c=pl.col.cat, x=1))36df2_struct = df2.select(struct=pl.struct(c=pl.col.cat, x=1))37assert_frame_equal(38df1_struct.select(pl.col.struct.hash()),39df2_struct.select(pl.col.struct.hash()),40)414243