Path: blob/main/py-polars/tests/unit/constructors/test_dataframe.py
8398 views
from __future__ import annotations12import enum3import sys4from collections import OrderedDict5from collections.abc import Mapping6from datetime import date, datetime, time7from typing import TYPE_CHECKING, Any89import pytest1011import polars as pl12from polars.exceptions import DataOrientationWarning, InvalidOperationError1314if TYPE_CHECKING:15from collections.abc import Iterator1617from polars._typing import SchemaDict181920def test_df_mixed_dtypes_string() -> None:21data = {"x": [["abc", 12, 34.5]], "y": [1]}2223with pytest.raises(TypeError, match="unexpected value"):24pl.DataFrame(data, strict=True)2526df = pl.DataFrame(data, strict=False)27assert df.schema == {"x": pl.List(pl.String), "y": pl.Int64}28assert df.rows() == [(["abc", "12", "34.5"], 1)]293031def test_df_mixed_dtypes_object() -> None:32data = {"x": [[b"abc", 12, 34.5]], "y": [1]}3334with pytest.raises(TypeError):35pl.DataFrame(data, strict=True)3637df = pl.DataFrame(data, strict=False)38assert df.schema == {"x": pl.Object, "y": pl.Int64}39assert df.rows() == [([b"abc", 12, 34.5], 1)]404142def test_df_object() -> None:43class Foo:44def __init__(self, value: int) -> None:45self._value = value4647def __eq__(self, other: object) -> bool:48return issubclass(other.__class__, self.__class__) and (49self._value == other._value # type: ignore[attr-defined]50)5152def __repr__(self) -> str:53return f"{self.__class__.__name__}({self._value})"5455df = pl.DataFrame({"a": [Foo(1), Foo(2)]})56assert df["a"].dtype.is_object()57assert df.rows() == [(Foo(1),), (Foo(2),)]585960def test_df_init_from_generator_dict_view() -> None:61d = {0: "x", 1: "y", 2: "z"}62data = {63"keys": d.keys(),64"vals": d.values(),65"items": d.items(),66}67with pytest.raises(TypeError, match="unexpected value"):68pl.DataFrame(data, strict=True)6970df = pl.DataFrame(data, strict=False)71assert df.schema == {72"keys": pl.Int64,73"vals": pl.String,74"items": pl.List(pl.String),75}76assert df.to_dict(as_series=False) == {77"keys": [0, 1, 2],78"vals": ["x", "y", "z"],79"items": [["0", "x"], ["1", "y"], ["2", "z"]],80}818283@pytest.mark.skipif(84sys.version_info < (3, 11),85reason="reversed dict views not supported before Python 3.11",86)87def test_df_init_from_generator_reversed_dict_view() -> None:88d = {0: "x", 1: "y", 2: "z"}89data = {90"rev_keys": reversed(d.keys()),91"rev_vals": reversed(d.values()),92"rev_items": reversed(d.items()),93}94df = pl.DataFrame(data, schema_overrides={"rev_items": pl.Object})9596assert df.schema == {97"rev_keys": pl.Int64,98"rev_vals": pl.String,99"rev_items": pl.Object,100}101assert df.to_dict(as_series=False) == {102"rev_keys": [2, 1, 0],103"rev_vals": ["z", "y", "x"],104"rev_items": [(2, "z"), (1, "y"), (0, "x")],105}106107108def test_df_init_strict() -> None:109data = {"a": [1, 2, 3.0]}110schema = {"a": pl.Int8}111with pytest.raises(TypeError):112pl.DataFrame(data, schema=schema, strict=True)113114df = pl.DataFrame(data, schema=schema, strict=False)115assert df["a"].to_list() == [1, 2, 3]116assert df["a"].dtype == pl.Int8117118119def test_df_init_from_series_strict() -> None:120s = pl.Series("a", [-1, 0, 1])121schema = {"a": pl.UInt8}122with pytest.raises(InvalidOperationError):123pl.DataFrame(s, schema=schema, strict=True)124125df = pl.DataFrame(s, schema=schema, strict=False)126assert df["a"].to_list() == [None, 0, 1]127assert df["a"].dtype == pl.UInt8128129130# https://github.com/pola-rs/polars/issues/15471131def test_df_init_rows_overrides_non_existing() -> None:132df = pl.DataFrame([{"a": 1}], schema_overrides={"a": pl.Int8(), "b": pl.Boolean()})133assert df.schema == OrderedDict({"a": pl.Int8})134135df = pl.DataFrame(136[{"a": 3, "b": 1.0}],137schema_overrides={"a": pl.Int8, "c": pl.Utf8},138)139assert df.schema == OrderedDict({"a": pl.Int8, "b": pl.Float64})140141142# https://github.com/pola-rs/polars/issues/15245143def test_df_init_nested_mixed_types() -> None:144data = [{"key": [{"value": 1}, {"value": 1.0}]}]145146with pytest.raises(TypeError, match="unexpected value"):147pl.DataFrame(data, strict=True)148149df = pl.DataFrame(data, strict=False)150assert df.schema == {"key": pl.List(pl.Struct({"value": pl.Float64}))}151assert df.to_dicts() == [{"key": [{"value": 1.0}, {"value": 1.0}]}]152153154class CustomSchema(Mapping[str, Any]):155"""Dummy schema object for testing compatibility with Mapping."""156157_entries: dict[str, Any]158159def __init__(self, **named_entries: Any) -> None:160self._items = OrderedDict(named_entries.items())161162def __getitem__(self, key: str) -> Any:163return self._items[key]164165def __len__(self) -> int:166return len(self._items)167168def __iter__(self) -> Iterator[str]:169yield from self._items170171172def test_custom_schema() -> None:173df = pl.DataFrame(schema=CustomSchema(bool=pl.Boolean, misc=pl.UInt8))174assert df.schema == OrderedDict([("bool", pl.Boolean), ("misc", pl.UInt8)])175176with pytest.raises(TypeError):177pl.DataFrame(schema=CustomSchema(bool="boolean", misc="unsigned int"))178179180def test_list_null_constructor_schema() -> None:181expected = pl.List(pl.Null)182assert pl.DataFrame({"a": [[]]}).dtypes[0] == expected183assert pl.DataFrame(schema={"a": pl.List}).dtypes[0] == expected184185186def test_df_init_schema_object() -> None:187schema = pl.Schema({"a": pl.Int8(), "b": pl.String()})188df = pl.DataFrame({"a": [1, 2, 3], "b": ["x", "y", "z"]}, schema=schema)189190assert df.columns == schema.names()191assert df.dtypes == schema.dtypes()192193194def test_df_init_data_orientation_inference_warning() -> None:195with pytest.warns(DataOrientationWarning):196pl.from_records([[1, 2, 3], [4, 5, 6]], schema=["a", "b", "c"])197198199def test_df_init_enum_dtype() -> None:200class PythonEnum(str, enum.Enum):201A = "A"202B = "B"203C = "C"204205df = pl.DataFrame({"Col 1": ["A", "B", "C"]}, schema={"Col 1": PythonEnum})206assert df.dtypes[0] == pl.Enum(["A", "B", "C"])207208209@pytest.mark.parametrize(210"schema_param",211[212{213"schema": {214"date": pl.Date,215"time": pl.Time,216"datetime": pl.Datetime,217},218},219{220"schema_overrides": {221"date": pl.Date(),222"time": pl.Time(),223"datetime": pl.Datetime(),224},225},226],227)228def test_temporal_string_schema_overrides(schema_param: dict[str, SchemaDict]) -> None:229df = pl.DataFrame(230{231"date": ["2024-01-01", "2025-10-07"],232"time": ["12:00:00", "13:30:00"],233"datetime": ["2024-01-01 23:59:59", "2024-01-02T13:30:00.123456"],234},235**schema_param, # type: ignore[arg-type]236)237assert df.schema == {238"date": pl.Date,239"time": pl.Time,240"datetime": pl.Datetime("us"),241}242assert df.to_dicts() == [243{244"date": date(2024, 1, 1),245"time": time(12, 0),246"datetime": datetime(2024, 1, 1, 23, 59, 59),247},248{249"date": date(2025, 10, 7),250"time": time(13, 30),251"datetime": datetime(2024, 1, 2, 13, 30, 0, 123456),252},253]254255256