Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/streaming/test_streaming_categoricals.py
6939 views
1
import pytest
2
3
import polars as pl
4
5
pytestmark = pytest.mark.xdist_group("streaming")
6
7
8
def test_streaming_nested_categorical() -> None:
9
assert (
10
pl.LazyFrame({"numbers": [1, 1, 2], "cat": [["str"], ["foo"], ["bar"]]})
11
.with_columns(pl.col("cat").cast(pl.List(pl.Categorical)))
12
.group_by("numbers")
13
.agg(pl.col("cat").first())
14
.sort("numbers")
15
).collect(engine="streaming").to_dict(as_series=False) == {
16
"numbers": [1, 2],
17
"cat": [["str"], ["bar"]],
18
}
19
20
21
def test_streaming_cat_14933() -> None:
22
# https://github.com/pola-rs/polars/issues/14933
23
24
df1 = pl.LazyFrame({"a": pl.Series([0], dtype=pl.UInt32)})
25
df2 = pl.LazyFrame(
26
[
27
pl.Series("a", [0, 1], dtype=pl.UInt32),
28
pl.Series("l", [None, None], dtype=pl.Categorical()),
29
]
30
)
31
result = df1.join(df2, on="a", how="left")
32
expected = {"a": [0], "l": [None]}
33
assert result.collect(engine="streaming").to_dict(as_series=False) == expected
34
35