Path: blob/main/py-polars/tests/unit/dataframe/test_rows.py
8375 views
from datetime import date12import pytest34import polars as pl5from polars.exceptions import NoRowsReturnedError, TooManyRowsReturnedError6from tests.unit.conftest import INTEGER_DTYPES789def test_row_tuple() -> None:10df = pl.DataFrame({"a": ["foo", "bar", "2"], "b": [1, 2, 3], "c": [1.0, 2.0, 3.0]})1112# return row by index13assert df.row(0) == ("foo", 1, 1.0)14assert df.row(1) == ("bar", 2, 2.0)15assert df.row(-1) == ("2", 3, 3.0)1617# return named row by index18row = df.row(0, named=True)19assert row == {"a": "foo", "b": 1, "c": 1.0}2021# return row by predicate22assert df.row(by_predicate=pl.col("a") == "bar") == ("bar", 2, 2.0)23assert df.row(by_predicate=pl.col("b").is_in([2, 4, 6])) == ("bar", 2, 2.0)2425# return named row by predicate26row = df.row(by_predicate=pl.col("a") == "bar", named=True)27assert row == {"a": "bar", "b": 2, "c": 2.0}2829# expected error conditions30with pytest.raises(TooManyRowsReturnedError):31df.row(by_predicate=pl.col("b").is_in([1, 3, 5]))3233with pytest.raises(NoRowsReturnedError):34df.row(by_predicate=pl.col("a") == "???")3536# cannot set both 'index' and 'by_predicate'37with pytest.raises(ValueError):38df.row(0, by_predicate=pl.col("a") == "bar")3940# must call 'by_predicate' by keyword41with pytest.raises(TypeError):42df.row(None, pl.col("a") == "bar") # type: ignore[call-overload]4344# cannot pass predicate into 'index'45with pytest.raises(TypeError):46df.row(pl.col("a") == "bar") # type: ignore[call-overload]4748# at least one of 'index' and 'by_predicate' must be set49with pytest.raises(ValueError):50df.row()515253def test_rows() -> None:54df = pl.DataFrame({"a": [1, 2], "b": [1, 2]})5556# Regular rows57assert df.rows() == [(1, 1), (2, 2)]58assert df.reverse().rows() == [(2, 2), (1, 1)]5960# Named rows61rows = df.rows(named=True)62assert rows == [{"a": 1, "b": 1}, {"a": 2, "b": 2}]6364# Rows with nullarray cols65df = df.with_columns(c=pl.lit(None))66assert df.schema == {"a": pl.Int64, "b": pl.Int64, "c": pl.Null}67assert df.rows() == [(1, 1, None), (2, 2, None)]68assert df.rows(named=True) == [69{"a": 1, "b": 1, "c": None},70{"a": 2, "b": 2, "c": None},71]727374def test_rows_by_key() -> None:75df = pl.DataFrame(76{77"w": ["a", "b", "b", "a"],78"x": ["q", "q", "q", "k"],79"y": [1.0, 2.5, 3.0, 4.5],80"z": [9, 8, 7, 6],81}82)8384# tuple (unnamed) rows85assert df.rows_by_key("w") == {86"a": [("q", 1.0, 9), ("k", 4.5, 6)],87"b": [("q", 2.5, 8), ("q", 3.0, 7)],88}89assert df.rows_by_key("w", unique=True) == {90"a": ("k", 4.5, 6),91"b": ("q", 3.0, 7),92}93assert df.rows_by_key("w", include_key=True) == {94"a": [("a", "q", 1.0, 9), ("a", "k", 4.5, 6)],95"b": [("b", "q", 2.5, 8), ("b", "q", 3.0, 7)],96}97assert df.rows_by_key("w", include_key=True) == {98key[0]: grp.rows() for key, grp in df.group_by(["w"])99}100assert df.rows_by_key("w", include_key=True, unique=True) == {101"a": ("a", "k", 4.5, 6),102"b": ("b", "q", 3.0, 7),103}104assert df.rows_by_key(["x", "w"]) == {105("q", "a"): [(1.0, 9)],106("q", "b"): [(2.5, 8), (3.0, 7)],107("k", "a"): [(4.5, 6)],108}109assert df.rows_by_key(["w", "x"], include_key=True) == {110("a", "q"): [("a", "q", 1.0, 9)],111("a", "k"): [("a", "k", 4.5, 6)],112("b", "q"): [("b", "q", 2.5, 8), ("b", "q", 3.0, 7)],113}114assert df.rows_by_key(["w", "x"], include_key=True, unique=True) == {115("a", "q"): ("a", "q", 1.0, 9),116("b", "q"): ("b", "q", 3.0, 7),117("a", "k"): ("a", "k", 4.5, 6),118}119120# dict (named) rows121assert df.rows_by_key("w", named=True) == {122"a": [{"x": "q", "y": 1.0, "z": 9}, {"x": "k", "y": 4.5, "z": 6}],123"b": [{"x": "q", "y": 2.5, "z": 8}, {"x": "q", "y": 3.0, "z": 7}],124}125assert df.rows_by_key("w", named=True, unique=True) == {126"a": {"x": "k", "y": 4.5, "z": 6},127"b": {"x": "q", "y": 3.0, "z": 7},128}129assert df.rows_by_key("w", named=True, include_key=True) == {130"a": [131{"w": "a", "x": "q", "y": 1.0, "z": 9},132{"w": "a", "x": "k", "y": 4.5, "z": 6},133],134"b": [135{"w": "b", "x": "q", "y": 2.5, "z": 8},136{"w": "b", "x": "q", "y": 3.0, "z": 7},137],138}139assert df.rows_by_key("w", named=True, include_key=True) == {140key[0]: grp.rows(named=True) for key, grp in df.group_by(["w"])141}142assert df.rows_by_key("w", named=True, include_key=True, unique=True) == {143"a": {"w": "a", "x": "k", "y": 4.5, "z": 6},144"b": {"w": "b", "x": "q", "y": 3.0, "z": 7},145}146assert df.rows_by_key(["x", "w"], named=True) == {147("q", "a"): [{"y": 1.0, "z": 9}],148("q", "b"): [{"y": 2.5, "z": 8}, {"y": 3.0, "z": 7}],149("k", "a"): [{"y": 4.5, "z": 6}],150}151assert df.rows_by_key(["w", "x"], named=True, include_key=True) == {152("a", "q"): [{"w": "a", "x": "q", "y": 1.0, "z": 9}],153("a", "k"): [{"w": "a", "x": "k", "y": 4.5, "z": 6}],154("b", "q"): [155{"w": "b", "x": "q", "y": 2.5, "z": 8},156{"w": "b", "x": "q", "y": 3.0, "z": 7},157],158}159assert df.rows_by_key(["w", "x"], named=True, include_key=True, unique=True) == {160("a", "q"): {"w": "a", "x": "q", "y": 1.0, "z": 9},161("b", "q"): {"w": "b", "x": "q", "y": 3.0, "z": 7},162("a", "k"): {"w": "a", "x": "k", "y": 4.5, "z": 6},163}164165166def test_iter_rows() -> None:167df = pl.DataFrame(168{169"a": [1, 2, 3],170"b": [True, False, None],171}172).with_columns(pl.Series(["a:b", "c:d", "e:f"]).str.split_exact(":", 1).alias("c"))173174# expected struct values175c1 = {"field_0": "a", "field_1": "b"}176c2 = {"field_0": "c", "field_1": "d"}177c3 = {"field_0": "e", "field_1": "f"}178179# Default iter_rows behaviour180it = df.iter_rows()181assert next(it) == (1, True, c1)182assert next(it) == (2, False, c2)183assert next(it) == (3, None, c3)184with pytest.raises(StopIteration):185next(it)186187# Apply explicit row-buffer size188for sz in (0, 1, 2, 3, 4):189it = df.iter_rows(buffer_size=sz)190assert next(it) == (1, True, c1)191assert next(it) == (2, False, c2)192assert next(it) == (3, None, c3)193with pytest.raises(StopIteration):194next(it)195196# Return named rows197it_named = df.iter_rows(named=True, buffer_size=sz)198row = next(it_named)199assert row == {"a": 1, "b": True, "c": c1}200row = next(it_named)201assert row == {"a": 2, "b": False, "c": c2}202row = next(it_named)203assert row == {"a": 3, "b": None, "c": c3}204205with pytest.raises(StopIteration):206next(it_named)207208# test over chunked frame209df = pl.concat(210[211pl.DataFrame({"id": [0, 1], "values": ["a", "b"]}),212pl.DataFrame({"id": [2, 3], "values": ["c", "d"]}),213],214rechunk=False,215)216assert df.n_chunks() == 2217assert df.to_dicts() == [218{"id": 0, "values": "a"},219{"id": 1, "values": "b"},220{"id": 2, "values": "c"},221{"id": 3, "values": "d"},222]223224225@pytest.mark.parametrize("primitive", INTEGER_DTYPES)226def test_row_constructor_schema(primitive: pl.DataType) -> None:227result = pl.DataFrame(data=[[1], [2], [3]], schema={"d": primitive}, orient="row")228229assert result.dtypes == [primitive]230assert result.to_dict(as_series=False) == {"d": [1, 2, 3]}231232233def test_row_constructor_uint64() -> None:234# validate init with a valid UInt64 that exceeds Int64 upper bound235df = pl.DataFrame(data=[[0], [(2**63) + 1]], schema={"x": pl.UInt64}, orient="row")236assert df.rows() == [(0,), (9223372036854775809,)]237238239def test_physical_row_encoding() -> None:240dt_str = [241{242"ts": date(2023, 7, 1),243"files": "AGG_202307.xlsx",244"period_bins": [date(2023, 7, 1), date(2024, 1, 1)],245},246]247248df = pl.from_dicts(dt_str)249df_groups = df.group_by("period_bins")250assert df_groups.all().to_dicts() == [251{252"period_bins": [date(2023, 7, 1), date(2024, 1, 1)],253"ts": [date(2023, 7, 1)],254"files": ["AGG_202307.xlsx"],255}256]257258259def test_row_with_no_arguments() -> None:260# confirm that calling bare `.row()` on a single-row frame behaves261# consistently with calling `item()` on a single element frame262df = pl.DataFrame({"tag": ["xx"], "n": [1]})263assert df.row() == ("xx", 1)264265# however, cannot call bare '.row()' if the frame does NOT have a single row266df = pl.DataFrame({"tag": ["xx", "yy"], "n": [1, 2]})267with pytest.raises(268ValueError,269match=r'can only call `\.row\(\)` without "index" or "by_predicate"',270):271df.row()272273274