Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/operations/test_hash.py
8422 views
1
import polars as pl
2
from polars.testing import assert_frame_equal
3
4
5
def test_hash_struct() -> None:
6
df = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
7
df = df.select(pl.struct(pl.all()))
8
assert df.select(pl.col("a").hash())["a"].to_list() == [
9
5535262844797696299,
10
15139341575481673729,
11
12593759486533989774,
12
]
13
14
15
def test_hash_cat_stable() -> None:
16
c1 = pl.Categories.random()
17
c2 = pl.Categories.random()
18
19
# Different insertion order.
20
s1 = pl.Series(["cow", "cat", "moo"], dtype=pl.Categorical(c1))
21
s2 = pl.Series(["cat", "moo", "cow"], dtype=pl.Categorical(c2))
22
23
# Same data should have same hash.
24
df1 = pl.DataFrame(
25
{"cat": ["cow", "cat", "moo"]}, schema={"cat": pl.Categorical(c1)}
26
)
27
df2 = pl.DataFrame(
28
{"cat": ["cow", "cat", "moo"]}, schema={"cat": pl.Categorical(c2)}
29
)
30
assert_frame_equal(
31
df1.select(pl.col.cat.hash()),
32
df2.select(pl.col.cat.hash()),
33
)
34
35
# Also stable in struct?
36
df1_struct = df1.select(struct=pl.struct(c=pl.col.cat, x=1))
37
df2_struct = df2.select(struct=pl.struct(c=pl.col.cat, x=1))
38
assert_frame_equal(
39
df1_struct.select(pl.col.struct.hash()),
40
df2_struct.select(pl.col.struct.hash()),
41
)
42
43