Path: blob/main/py-polars/tests/unit/constructors/test_dataframe.py
6939 views
from __future__ import annotations12import enum3import sys4from collections import OrderedDict5from collections.abc import Mapping6from typing import TYPE_CHECKING, Any78import pytest910import polars as pl11from polars.exceptions import DataOrientationWarning, InvalidOperationError1213if TYPE_CHECKING:14from collections.abc import Iterator151617def test_df_mixed_dtypes_string() -> None:18data = {"x": [["abc", 12, 34.5]], "y": [1]}1920with pytest.raises(TypeError, match="unexpected value"):21pl.DataFrame(data, strict=True)2223df = pl.DataFrame(data, strict=False)24assert df.schema == {"x": pl.List(pl.String), "y": pl.Int64}25assert df.rows() == [(["abc", "12", "34.5"], 1)]262728def test_df_mixed_dtypes_object() -> None:29data = {"x": [[b"abc", 12, 34.5]], "y": [1]}3031with pytest.raises(TypeError):32pl.DataFrame(data, strict=True)3334df = pl.DataFrame(data, strict=False)35assert df.schema == {"x": pl.Object, "y": pl.Int64}36assert df.rows() == [([b"abc", 12, 34.5], 1)]373839def test_df_object() -> None:40class Foo:41def __init__(self, value: int) -> None:42self._value = value4344def __eq__(self, other: object) -> bool:45return issubclass(other.__class__, self.__class__) and (46self._value == other._value # type: ignore[attr-defined]47)4849def __repr__(self) -> str:50return f"{self.__class__.__name__}({self._value})"5152df = pl.DataFrame({"a": [Foo(1), Foo(2)]})53assert df["a"].dtype.is_object()54assert df.rows() == [(Foo(1),), (Foo(2),)]555657def test_df_init_from_generator_dict_view() -> None:58d = {0: "x", 1: "y", 2: "z"}59data = {60"keys": d.keys(),61"vals": d.values(),62"items": d.items(),63}64with pytest.raises(TypeError, match="unexpected value"):65pl.DataFrame(data, strict=True)6667df = pl.DataFrame(data, strict=False)68assert df.schema == {69"keys": pl.Int64,70"vals": pl.String,71"items": pl.List(pl.String),72}73assert df.to_dict(as_series=False) == {74"keys": [0, 1, 2],75"vals": ["x", "y", "z"],76"items": [["0", "x"], ["1", "y"], ["2", "z"]],77}787980@pytest.mark.skipif(81sys.version_info < (3, 11),82reason="reversed dict views not supported before Python 3.11",83)84def test_df_init_from_generator_reversed_dict_view() -> None:85d = {0: "x", 1: "y", 2: "z"}86data = {87"rev_keys": reversed(d.keys()),88"rev_vals": reversed(d.values()),89"rev_items": reversed(d.items()),90}91df = pl.DataFrame(data, schema_overrides={"rev_items": pl.Object})9293assert df.schema == {94"rev_keys": pl.Int64,95"rev_vals": pl.String,96"rev_items": pl.Object,97}98assert df.to_dict(as_series=False) == {99"rev_keys": [2, 1, 0],100"rev_vals": ["z", "y", "x"],101"rev_items": [(2, "z"), (1, "y"), (0, "x")],102}103104105def test_df_init_strict() -> None:106data = {"a": [1, 2, 3.0]}107schema = {"a": pl.Int8}108with pytest.raises(TypeError):109pl.DataFrame(data, schema=schema, strict=True)110111df = pl.DataFrame(data, schema=schema, strict=False)112113assert df["a"].to_list() == [1, 2, 3]114assert df["a"].dtype == pl.Int8115116117def test_df_init_from_series_strict() -> None:118s = pl.Series("a", [-1, 0, 1])119schema = {"a": pl.UInt8}120with pytest.raises(InvalidOperationError):121pl.DataFrame(s, schema=schema, strict=True)122123df = pl.DataFrame(s, schema=schema, strict=False)124125assert df["a"].to_list() == [None, 0, 1]126assert df["a"].dtype == pl.UInt8127128129# https://github.com/pola-rs/polars/issues/15471130def test_df_init_rows_overrides_non_existing() -> None:131df = pl.DataFrame([{"a": 1}], schema_overrides={"a": pl.Int8(), "b": pl.Boolean()})132assert df.schema == OrderedDict({"a": pl.Int8})133134df = pl.DataFrame(135[{"a": 3, "b": 1.0}],136schema_overrides={"a": pl.Int8, "c": pl.Utf8},137)138assert df.schema == OrderedDict({"a": pl.Int8, "b": pl.Float64})139140141# https://github.com/pola-rs/polars/issues/15245142def test_df_init_nested_mixed_types() -> None:143data = [{"key": [{"value": 1}, {"value": 1.0}]}]144145with pytest.raises(TypeError, match="unexpected value"):146pl.DataFrame(data, strict=True)147148df = pl.DataFrame(data, strict=False)149150assert df.schema == {"key": pl.List(pl.Struct({"value": pl.Float64}))}151assert df.to_dicts() == [{"key": [{"value": 1.0}, {"value": 1.0}]}]152153154class CustomSchema(Mapping[str, Any]):155"""Dummy schema object for testing compatibility with Mapping."""156157_entries: dict[str, Any]158159def __init__(self, **named_entries: Any) -> None:160self._items = OrderedDict(named_entries.items())161162def __getitem__(self, key: str) -> Any:163return self._items[key]164165def __len__(self) -> int:166return len(self._items)167168def __iter__(self) -> Iterator[str]:169yield from self._items170171172def test_custom_schema() -> None:173df = pl.DataFrame(schema=CustomSchema(bool=pl.Boolean, misc=pl.UInt8))174assert df.schema == OrderedDict([("bool", pl.Boolean), ("misc", pl.UInt8)])175176with pytest.raises(TypeError):177pl.DataFrame(schema=CustomSchema(bool="boolean", misc="unsigned int"))178179180def test_list_null_constructor_schema() -> None:181expected = pl.List(pl.Null)182assert pl.DataFrame({"a": [[]]}).dtypes[0] == expected183assert pl.DataFrame(schema={"a": pl.List}).dtypes[0] == expected184185186def test_df_init_schema_object() -> None:187schema = pl.Schema({"a": pl.Int8(), "b": pl.String()})188df = pl.DataFrame({"a": [1, 2, 3], "b": ["x", "y", "z"]}, schema=schema)189190assert df.columns == schema.names()191assert df.dtypes == schema.dtypes()192193194def test_df_init_data_orientation_inference_warning() -> None:195with pytest.warns(DataOrientationWarning):196pl.from_records([[1, 2, 3], [4, 5, 6]], schema=["a", "b", "c"])197198199def test_df_init_enum_dtype() -> None:200class PythonEnum(str, enum.Enum):201A = "A"202B = "B"203C = "C"204205df = pl.DataFrame({"Col 1": ["A", "B", "C"]}, schema={"Col 1": PythonEnum})206assert df.dtypes[0] == pl.Enum(["A", "B", "C"])207208209