Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/constructors/test_dataframe.py
6939 views
1
from __future__ import annotations
2
3
import enum
4
import sys
5
from collections import OrderedDict
6
from collections.abc import Mapping
7
from typing import TYPE_CHECKING, Any
8
9
import pytest
10
11
import polars as pl
12
from polars.exceptions import DataOrientationWarning, InvalidOperationError
13
14
if TYPE_CHECKING:
15
from collections.abc import Iterator
16
17
18
def test_df_mixed_dtypes_string() -> None:
19
data = {"x": [["abc", 12, 34.5]], "y": [1]}
20
21
with pytest.raises(TypeError, match="unexpected value"):
22
pl.DataFrame(data, strict=True)
23
24
df = pl.DataFrame(data, strict=False)
25
assert df.schema == {"x": pl.List(pl.String), "y": pl.Int64}
26
assert df.rows() == [(["abc", "12", "34.5"], 1)]
27
28
29
def test_df_mixed_dtypes_object() -> None:
30
data = {"x": [[b"abc", 12, 34.5]], "y": [1]}
31
32
with pytest.raises(TypeError):
33
pl.DataFrame(data, strict=True)
34
35
df = pl.DataFrame(data, strict=False)
36
assert df.schema == {"x": pl.Object, "y": pl.Int64}
37
assert df.rows() == [([b"abc", 12, 34.5], 1)]
38
39
40
def test_df_object() -> None:
41
class Foo:
42
def __init__(self, value: int) -> None:
43
self._value = value
44
45
def __eq__(self, other: object) -> bool:
46
return issubclass(other.__class__, self.__class__) and (
47
self._value == other._value # type: ignore[attr-defined]
48
)
49
50
def __repr__(self) -> str:
51
return f"{self.__class__.__name__}({self._value})"
52
53
df = pl.DataFrame({"a": [Foo(1), Foo(2)]})
54
assert df["a"].dtype.is_object()
55
assert df.rows() == [(Foo(1),), (Foo(2),)]
56
57
58
def test_df_init_from_generator_dict_view() -> None:
59
d = {0: "x", 1: "y", 2: "z"}
60
data = {
61
"keys": d.keys(),
62
"vals": d.values(),
63
"items": d.items(),
64
}
65
with pytest.raises(TypeError, match="unexpected value"):
66
pl.DataFrame(data, strict=True)
67
68
df = pl.DataFrame(data, strict=False)
69
assert df.schema == {
70
"keys": pl.Int64,
71
"vals": pl.String,
72
"items": pl.List(pl.String),
73
}
74
assert df.to_dict(as_series=False) == {
75
"keys": [0, 1, 2],
76
"vals": ["x", "y", "z"],
77
"items": [["0", "x"], ["1", "y"], ["2", "z"]],
78
}
79
80
81
@pytest.mark.skipif(
82
sys.version_info < (3, 11),
83
reason="reversed dict views not supported before Python 3.11",
84
)
85
def test_df_init_from_generator_reversed_dict_view() -> None:
86
d = {0: "x", 1: "y", 2: "z"}
87
data = {
88
"rev_keys": reversed(d.keys()),
89
"rev_vals": reversed(d.values()),
90
"rev_items": reversed(d.items()),
91
}
92
df = pl.DataFrame(data, schema_overrides={"rev_items": pl.Object})
93
94
assert df.schema == {
95
"rev_keys": pl.Int64,
96
"rev_vals": pl.String,
97
"rev_items": pl.Object,
98
}
99
assert df.to_dict(as_series=False) == {
100
"rev_keys": [2, 1, 0],
101
"rev_vals": ["z", "y", "x"],
102
"rev_items": [(2, "z"), (1, "y"), (0, "x")],
103
}
104
105
106
def test_df_init_strict() -> None:
107
data = {"a": [1, 2, 3.0]}
108
schema = {"a": pl.Int8}
109
with pytest.raises(TypeError):
110
pl.DataFrame(data, schema=schema, strict=True)
111
112
df = pl.DataFrame(data, schema=schema, strict=False)
113
114
assert df["a"].to_list() == [1, 2, 3]
115
assert df["a"].dtype == pl.Int8
116
117
118
def test_df_init_from_series_strict() -> None:
119
s = pl.Series("a", [-1, 0, 1])
120
schema = {"a": pl.UInt8}
121
with pytest.raises(InvalidOperationError):
122
pl.DataFrame(s, schema=schema, strict=True)
123
124
df = pl.DataFrame(s, schema=schema, strict=False)
125
126
assert df["a"].to_list() == [None, 0, 1]
127
assert df["a"].dtype == pl.UInt8
128
129
130
# https://github.com/pola-rs/polars/issues/15471
131
def test_df_init_rows_overrides_non_existing() -> None:
132
df = pl.DataFrame([{"a": 1}], schema_overrides={"a": pl.Int8(), "b": pl.Boolean()})
133
assert df.schema == OrderedDict({"a": pl.Int8})
134
135
df = pl.DataFrame(
136
[{"a": 3, "b": 1.0}],
137
schema_overrides={"a": pl.Int8, "c": pl.Utf8},
138
)
139
assert df.schema == OrderedDict({"a": pl.Int8, "b": pl.Float64})
140
141
142
# https://github.com/pola-rs/polars/issues/15245
143
def test_df_init_nested_mixed_types() -> None:
144
data = [{"key": [{"value": 1}, {"value": 1.0}]}]
145
146
with pytest.raises(TypeError, match="unexpected value"):
147
pl.DataFrame(data, strict=True)
148
149
df = pl.DataFrame(data, strict=False)
150
151
assert df.schema == {"key": pl.List(pl.Struct({"value": pl.Float64}))}
152
assert df.to_dicts() == [{"key": [{"value": 1.0}, {"value": 1.0}]}]
153
154
155
class CustomSchema(Mapping[str, Any]):
156
"""Dummy schema object for testing compatibility with Mapping."""
157
158
_entries: dict[str, Any]
159
160
def __init__(self, **named_entries: Any) -> None:
161
self._items = OrderedDict(named_entries.items())
162
163
def __getitem__(self, key: str) -> Any:
164
return self._items[key]
165
166
def __len__(self) -> int:
167
return len(self._items)
168
169
def __iter__(self) -> Iterator[str]:
170
yield from self._items
171
172
173
def test_custom_schema() -> None:
174
df = pl.DataFrame(schema=CustomSchema(bool=pl.Boolean, misc=pl.UInt8))
175
assert df.schema == OrderedDict([("bool", pl.Boolean), ("misc", pl.UInt8)])
176
177
with pytest.raises(TypeError):
178
pl.DataFrame(schema=CustomSchema(bool="boolean", misc="unsigned int"))
179
180
181
def test_list_null_constructor_schema() -> None:
182
expected = pl.List(pl.Null)
183
assert pl.DataFrame({"a": [[]]}).dtypes[0] == expected
184
assert pl.DataFrame(schema={"a": pl.List}).dtypes[0] == expected
185
186
187
def test_df_init_schema_object() -> None:
188
schema = pl.Schema({"a": pl.Int8(), "b": pl.String()})
189
df = pl.DataFrame({"a": [1, 2, 3], "b": ["x", "y", "z"]}, schema=schema)
190
191
assert df.columns == schema.names()
192
assert df.dtypes == schema.dtypes()
193
194
195
def test_df_init_data_orientation_inference_warning() -> None:
196
with pytest.warns(DataOrientationWarning):
197
pl.from_records([[1, 2, 3], [4, 5, 6]], schema=["a", "b", "c"])
198
199
200
def test_df_init_enum_dtype() -> None:
201
class PythonEnum(str, enum.Enum):
202
A = "A"
203
B = "B"
204
C = "C"
205
206
df = pl.DataFrame({"Col 1": ["A", "B", "C"]}, schema={"Col 1": PythonEnum})
207
assert df.dtypes[0] == pl.Enum(["A", "B", "C"])
208
209