Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/constructors/test_constructors.py
8326 views
1
from __future__ import annotations
2
3
from collections import OrderedDict, namedtuple
4
from datetime import date, datetime, time, timedelta, timezone
5
from decimal import Decimal
6
from random import shuffle
7
from typing import TYPE_CHECKING, Any, Literal, NamedTuple
8
from zoneinfo import ZoneInfo
9
10
import numpy as np
11
import pandas as pd
12
import pyarrow as pa
13
import pytest
14
from packaging.version import parse as parse_version
15
from pydantic import BaseModel, Field, TypeAdapter
16
17
import polars as pl
18
import polars.selectors as cs
19
from polars._dependencies import dataclasses, pydantic
20
from polars._utils.construction.utils import try_get_type_hints
21
from polars.datatypes import numpy_char_code_to_dtype
22
from polars.exceptions import DuplicateError, ShapeError
23
from polars.testing import assert_frame_equal, assert_series_equal
24
from tests.unit.utils.pycapsule_utils import PyCapsuleArrayHolder, PyCapsuleStreamHolder
25
26
if TYPE_CHECKING:
27
import sys
28
from collections.abc import Callable
29
30
from polars._typing import PolarsDataType
31
32
if sys.version_info >= (3, 11):
33
from typing import Self
34
else:
35
from typing_extensions import Self
36
37
from typing_extensions import assert_type
38
39
40
# -----------------------------------------------------------------------------------
41
# nested dataclasses, models, namedtuple classes (can't be defined inside test func)
42
# -----------------------------------------------------------------------------------
43
@dataclasses.dataclass
44
class _TestBazDC:
45
d: datetime
46
e: float
47
f: str
48
49
50
@dataclasses.dataclass
51
class _TestBarDC:
52
a: str
53
b: int
54
c: _TestBazDC
55
56
57
@dataclasses.dataclass
58
class _TestFooDC:
59
x: int
60
y: _TestBarDC
61
62
63
class _TestBazPD(pydantic.BaseModel):
64
d: datetime
65
e: float
66
f: str
67
68
69
class _TestBarPD(pydantic.BaseModel):
70
a: str
71
b: int
72
c: _TestBazPD
73
74
75
class _TestFooPD(pydantic.BaseModel):
76
x: int
77
y: _TestBarPD
78
79
80
class _TestBazNT(NamedTuple):
81
d: datetime
82
e: float
83
f: str
84
85
86
class _TestBarNT(NamedTuple):
87
a: str
88
b: int
89
c: _TestBazNT
90
91
92
class _TestFooNT(NamedTuple):
93
x: int
94
y: _TestBarNT
95
96
97
# --------------------------------------------------------------------------------
98
99
100
def test_init_dict() -> None:
101
# Empty dictionary
102
df = pl.DataFrame({})
103
assert df.shape == (0, 0)
104
105
# Empty dictionary/values
106
df = pl.DataFrame({"a": [], "b": []})
107
assert df.shape == (0, 2)
108
assert df.schema == {"a": pl.Null, "b": pl.Null}
109
110
for df in (
111
pl.DataFrame({}, schema={"a": pl.Date, "b": pl.String}),
112
pl.DataFrame({"a": [], "b": []}, schema={"a": pl.Date, "b": pl.String}),
113
):
114
assert df.shape == (0, 2)
115
assert df.schema == {"a": pl.Date, "b": pl.String}
116
117
# List of empty list
118
df = pl.DataFrame({"a": [[]], "b": [[]]})
119
expected = {"a": pl.List(pl.Null), "b": pl.List(pl.Null)}
120
assert df.schema == expected
121
assert df.rows() == [([], [])]
122
123
# Mixed dtypes
124
df = pl.DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0]})
125
assert df.shape == (3, 2)
126
assert df.columns == ["a", "b"]
127
assert df.dtypes == [pl.Int64, pl.Float64]
128
129
df = pl.DataFrame(
130
data={"a": [1, 2, 3], "b": [1.0, 2.0, 3.0]},
131
schema=[("a", pl.Int8), ("b", pl.Float32)],
132
)
133
assert df.schema == {"a": pl.Int8, "b": pl.Float32}
134
135
# Values contained in tuples
136
df = pl.DataFrame({"a": (1, 2, 3), "b": [1.0, 2.0, 3.0]})
137
assert df.shape == (3, 2)
138
139
# Datetime/Date types (from both python and integer values)
140
py_datetimes = (
141
datetime(2022, 12, 31, 23, 59, 59),
142
datetime(2022, 12, 31, 23, 59, 59),
143
)
144
py_dates = (date(2022, 12, 31), date(2022, 12, 31))
145
int_datetimes = [1672531199000000, 1672531199000000]
146
int_dates = [19357, 19357]
147
148
for dates, datetimes, coldefs in (
149
# test inferred and explicit (given both py/polars dtypes)
150
(py_dates, py_datetimes, None),
151
(py_dates, py_datetimes, [("dt", date), ("dtm", datetime)]),
152
(py_dates, py_datetimes, [("dt", pl.Date), ("dtm", pl.Datetime)]),
153
(int_dates, int_datetimes, [("dt", date), ("dtm", datetime)]),
154
(int_dates, int_datetimes, [("dt", pl.Date), ("dtm", pl.Datetime)]),
155
):
156
df = pl.DataFrame(
157
data={"dt": dates, "dtm": datetimes},
158
schema=coldefs,
159
)
160
assert df.schema == {"dt": pl.Date, "dtm": pl.Datetime("us")}
161
assert df.rows() == list(zip(py_dates, py_datetimes, strict=True))
162
163
# Overriding dict column names/types
164
df = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, schema=["c", "d"])
165
assert df.columns == ["c", "d"]
166
167
df = pl.DataFrame(
168
{"a": [1, 2, 3], "b": [4, 5, 6]},
169
schema=["c", ("d", pl.Int8)],
170
) # partial type info (allowed, but mypy doesn't like it ;p)
171
assert df.schema == {"c": pl.Int64, "d": pl.Int8}
172
173
df = pl.DataFrame(
174
{"a": [1, 2, 3], "b": [4, 5, 6]}, schema=[("c", pl.Int8), ("d", pl.Int16)]
175
)
176
assert df.schema == {"c": pl.Int8, "d": pl.Int16}
177
178
# empty nested objects
179
for empty_val in [None, "", {}, []]: # type: ignore[var-annotated]
180
test = [{"field": {"sub_field": empty_val, "sub_field_2": 2}}]
181
df = pl.DataFrame(test, schema={"field": pl.Object})
182
assert df["field"][0] == test[0]["field"]
183
184
185
def test_error_string_dtypes() -> None:
186
with pytest.raises(TypeError, match="cannot parse input"):
187
pl.DataFrame(
188
data={"x": [1, 2], "y": [3, 4], "z": [5, 6]},
189
schema={"x": "i16", "y": "i32", "z": "f32"}, # type: ignore[dict-item]
190
)
191
192
with pytest.raises(TypeError, match="cannot parse input"):
193
pl.Series("n", [1, 2, 3], dtype="f32") # type: ignore[arg-type]
194
195
196
def test_init_structured_objects() -> None:
197
# validate init from dataclass, namedtuple, and pydantic model objects
198
@dataclasses.dataclass
199
class TradeDC:
200
timestamp: datetime
201
ticker: str
202
price: float
203
size: int | None = None
204
205
class TradePD(pydantic.BaseModel):
206
timestamp: datetime
207
ticker: str
208
price: float
209
size: int
210
211
class TradeNT(NamedTuple):
212
timestamp: datetime
213
ticker: str
214
price: float
215
size: int | None = None
216
217
raw_data = [
218
(datetime(2022, 9, 8, 14, 30, 45), "AAPL", 157.5, 125),
219
(datetime(2022, 9, 9, 10, 15, 12), "FLSY", 10.0, 1500),
220
(datetime(2022, 9, 7, 15, 30), "MU", 55.5, 400),
221
]
222
columns = ["timestamp", "ticker", "price", "size"]
223
224
for TradeClass in (TradeDC, TradeNT, TradePD):
225
trades = [
226
TradeClass(**dict(zip(columns, values, strict=True))) # type: ignore[arg-type]
227
for values in raw_data
228
]
229
230
for DF in (pl.DataFrame, pl.from_records):
231
df = DF(data=trades)
232
assert df.schema == {
233
"timestamp": pl.Datetime("us"),
234
"ticker": pl.String,
235
"price": pl.Float64,
236
"size": pl.Int64,
237
}
238
assert df.rows() == raw_data
239
240
# partial dtypes override
241
df = DF(
242
data=trades,
243
schema_overrides={"timestamp": pl.Datetime("ms"), "size": pl.Int32},
244
)
245
assert df.schema == {
246
"timestamp": pl.Datetime("ms"),
247
"ticker": pl.String,
248
"price": pl.Float64,
249
"size": pl.Int32,
250
}
251
252
# in conjunction with full 'columns' override (rename/downcast)
253
df = pl.DataFrame(
254
data=trades,
255
schema=[
256
("ts", pl.Datetime("ms")),
257
("tk", pl.Categorical),
258
("pc", pl.Float64),
259
("sz", pl.UInt16),
260
],
261
)
262
assert df.schema == {
263
"ts": pl.Datetime("ms"),
264
"tk": pl.Categorical(),
265
"pc": pl.Float64,
266
"sz": pl.UInt16,
267
}
268
assert df.rows() == raw_data
269
270
# cover a miscellaneous edge-case when detecting the annotations
271
assert try_get_type_hints(obj=type(None)) == {}
272
273
274
def test_init_pydantic_2x() -> None:
275
class PageView(BaseModel):
276
user_id: str
277
ts: datetime = Field(alias=["ts", "$date"]) # type: ignore[literal-required, call-overload]
278
path: str = Field("?", alias=["url", "path"]) # type: ignore[literal-required, call-overload]
279
referer: str = Field("?", alias="referer")
280
event: Literal["leave", "enter"] = Field("enter")
281
time_on_page: int = Field(0, serialization_alias="top")
282
283
data_json = """
284
[{
285
"user_id": "x",
286
"ts": {"$date": "2021-01-01T00:00:00.000Z"},
287
"url": "/latest/foobar",
288
"referer": "https://google.com",
289
"event": "enter",
290
"top": 123
291
}]
292
"""
293
adapter: TypeAdapter[Any] = TypeAdapter(list[PageView])
294
models = adapter.validate_json(data_json)
295
296
result = pl.DataFrame(models)
297
expected = pl.DataFrame(
298
{
299
"user_id": ["x"],
300
"ts": [datetime(2021, 1, 1, 0, 0)],
301
"path": ["?"],
302
"referer": ["https://google.com"],
303
"event": ["enter"],
304
"time_on_page": [0],
305
}
306
)
307
assert_frame_equal(result, expected)
308
309
310
def test_init_structured_objects_unhashable() -> None:
311
# cover an edge-case with namedtuple fields that aren't hashable
312
313
class Test(NamedTuple):
314
dt: datetime
315
info: dict[str, int]
316
317
test_data = [
318
Test(datetime(2017, 1, 1), {"a": 1, "b": 2}),
319
Test(datetime(2017, 1, 2), {"a": 2, "b": 2}),
320
]
321
df = pl.DataFrame(test_data)
322
# shape: (2, 2)
323
# ┌─────────────────────┬───────────┐
324
# │ dt ┆ info │
325
# │ --- ┆ --- │
326
# │ datetime[μs] ┆ struct[2] │
327
# ╞═════════════════════╪═══════════╡
328
# │ 2017-01-01 00:00:00 ┆ {1,2} │
329
# │ 2017-01-02 00:00:00 ┆ {2,2} │
330
# └─────────────────────┴───────────┘
331
assert df.schema == {
332
"dt": pl.Datetime(time_unit="us", time_zone=None),
333
"info": pl.Struct([pl.Field("a", pl.Int64), pl.Field("b", pl.Int64)]),
334
}
335
assert df.rows() == test_data
336
337
338
@pytest.mark.parametrize(
339
("foo", "bar", "baz"),
340
[
341
(_TestFooDC, _TestBarDC, _TestBazDC),
342
(_TestFooPD, _TestBarPD, _TestBazPD),
343
(_TestFooNT, _TestBarNT, _TestBazNT),
344
],
345
)
346
def test_init_structured_objects_nested(foo: Any, bar: Any, baz: Any) -> None:
347
data = [
348
foo(
349
x=100,
350
y=bar(
351
a="hello",
352
b=800,
353
c=baz(d=datetime(2023, 4, 12, 10, 30), e=-10.5, f="world"),
354
),
355
)
356
]
357
df = pl.DataFrame(data)
358
# shape: (1, 2)
359
# ┌─────┬───────────────────────────────────┐
360
# │ x ┆ y │
361
# │ --- ┆ --- │
362
# │ i64 ┆ struct[3] │
363
# ╞═════╪═══════════════════════════════════╡
364
# │ 100 ┆ {"hello",800,{2023-04-12 10:30:0… │
365
# └─────┴───────────────────────────────────┘
366
367
assert df.schema == {
368
"x": pl.Int64,
369
"y": pl.Struct(
370
[
371
pl.Field("a", pl.String),
372
pl.Field("b", pl.Int64),
373
pl.Field(
374
"c",
375
pl.Struct(
376
[
377
pl.Field("d", pl.Datetime("us")),
378
pl.Field("e", pl.Float64),
379
pl.Field("f", pl.String),
380
]
381
),
382
),
383
]
384
),
385
}
386
assert df.row(0) == (
387
100,
388
{
389
"a": "hello",
390
"b": 800,
391
"c": {
392
"d": datetime(2023, 4, 12, 10, 30),
393
"e": -10.5,
394
"f": "world",
395
},
396
},
397
)
398
399
# validate nested schema override
400
override_struct_schema: dict[str, PolarsDataType] = {
401
"x": pl.Int16,
402
"y": pl.Struct(
403
[
404
pl.Field("a", pl.String),
405
pl.Field("b", pl.Int32),
406
pl.Field(
407
name="c",
408
dtype=pl.Struct(
409
[
410
pl.Field("d", pl.Datetime("ms")),
411
pl.Field("e", pl.Float32),
412
pl.Field("f", pl.String),
413
]
414
),
415
),
416
]
417
),
418
}
419
for schema, schema_overrides in (
420
(None, override_struct_schema),
421
(override_struct_schema, None),
422
):
423
df = (
424
pl.DataFrame(data, schema=schema, schema_overrides=schema_overrides)
425
.unnest("y")
426
.unnest("c")
427
)
428
# shape: (1, 6)
429
# ┌─────┬───────┬─────┬─────────────────────┬───────┬───────┐
430
# │ x ┆ a ┆ b ┆ d ┆ e ┆ f │
431
# │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
432
# │ i16 ┆ str ┆ i32 ┆ datetime[ms] ┆ f32 ┆ str │
433
# ╞═════╪═══════╪═════╪═════════════════════╪═══════╪═══════╡
434
# │ 100 ┆ hello ┆ 800 ┆ 2023-04-12 10:30:00 ┆ -10.5 ┆ world │
435
# └─────┴───────┴─────┴─────────────────────┴───────┴───────┘
436
assert df.schema == {
437
"x": pl.Int16,
438
"a": pl.String,
439
"b": pl.Int32,
440
"d": pl.Datetime("ms"),
441
"e": pl.Float32,
442
"f": pl.String,
443
}
444
assert df.row(0) == (
445
100,
446
"hello",
447
800,
448
datetime(2023, 4, 12, 10, 30),
449
-10.5,
450
"world",
451
)
452
453
454
def test_dataclasses_initvar_typing() -> None:
455
@dataclasses.dataclass
456
class ABC:
457
x: date
458
y: float
459
z: dataclasses.InitVar[list[str]] = None
460
461
# should be able to parse the initvar typing...
462
abc = ABC(x=date(1999, 12, 31), y=100.0)
463
df = pl.DataFrame([abc])
464
465
# ...but should not load the initvar field into the DataFrame
466
assert dataclasses.asdict(abc) == df.rows(named=True)[0]
467
468
469
@pytest.mark.parametrize(
470
"nt",
471
[
472
namedtuple("TestData", ["id", "info"]), # noqa: PYI024
473
NamedTuple("TestData", [("id", int), ("info", str)]),
474
],
475
)
476
def test_collections_namedtuple(nt: type) -> None:
477
nt_data = [nt(1, "a"), nt(2, "b"), nt(3, "c")]
478
479
result = pl.DataFrame(nt_data)
480
expected = pl.DataFrame({"id": [1, 2, 3], "info": ["a", "b", "c"]})
481
assert_frame_equal(result, expected)
482
483
result = pl.DataFrame({"data": nt_data, "misc": ["x", "y", "z"]})
484
expected = pl.DataFrame(
485
{
486
"data": [
487
{"id": 1, "info": "a"},
488
{"id": 2, "info": "b"},
489
{"id": 3, "info": "c"},
490
],
491
"misc": ["x", "y", "z"],
492
}
493
)
494
assert_frame_equal(result, expected)
495
496
497
def test_init_ndarray() -> None:
498
# Empty array
499
df = pl.DataFrame(np.array([]))
500
assert_frame_equal(df, pl.DataFrame())
501
502
# 1D array
503
df = pl.DataFrame(np.array([1, 2, 3], dtype=np.int64), schema=["a"])
504
expected = pl.DataFrame({"a": [1, 2, 3]})
505
assert_frame_equal(df, expected)
506
507
df = pl.DataFrame(np.array([1, 2, 3]), schema=[("a", pl.Int32)])
508
expected = pl.DataFrame({"a": [1, 2, 3]}).with_columns(pl.col("a").cast(pl.Int32))
509
assert_frame_equal(df, expected)
510
511
# 2D array (or 2x 1D array) - should default to column orientation (if C-contiguous)
512
for data in (
513
np.array([[1, 2], [3, 4]], dtype=np.int64),
514
[np.array([1, 2], dtype=np.int64), np.array([3, 4], dtype=np.int64)],
515
):
516
df = pl.DataFrame(data, orient="col")
517
expected = pl.DataFrame({"column_0": [1, 2], "column_1": [3, 4]})
518
assert_frame_equal(df, expected)
519
520
df = pl.DataFrame([[1, 2.0, "a"], [None, None, None]], orient="row")
521
expected = pl.DataFrame(
522
{"column_0": [1, None], "column_1": [2.0, None], "column_2": ["a", None]}
523
)
524
assert_frame_equal(df, expected)
525
526
df = pl.DataFrame(
527
data=[[1, 2.0, "a"], [None, None, None]],
528
schema=[("x", pl.Boolean), ("y", pl.Int32), "z"],
529
orient="row",
530
)
531
assert df.rows() == [(True, 2, "a"), (None, None, None)]
532
assert df.schema == {"x": pl.Boolean, "y": pl.Int32, "z": pl.String}
533
534
# 2D array - default to column orientation
535
df = pl.DataFrame(np.array([[1, 2], [3, 4]], dtype=np.int64))
536
expected = pl.DataFrame({"column_0": [1, 3], "column_1": [2, 4]})
537
assert_frame_equal(df, expected)
538
539
# no orientation, numpy convention
540
df = pl.DataFrame(np.ones((3, 1), dtype=np.int64))
541
assert df.shape == (3, 1)
542
543
# 2D array - row orientation inferred
544
df = pl.DataFrame(
545
np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int64), schema=["a", "b", "c"]
546
)
547
expected = pl.DataFrame({"a": [1, 4], "b": [2, 5], "c": [3, 6]})
548
assert_frame_equal(df, expected)
549
550
# 2D array - column orientation inferred
551
df = pl.DataFrame(
552
np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int64), schema=["a", "b"]
553
)
554
expected = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
555
assert_frame_equal(df, expected)
556
557
# List column from 2D array with single-column schema
558
df = pl.DataFrame(np.arange(4).reshape(-1, 1).astype(np.int64), schema=["a"])
559
assert_frame_equal(df, pl.DataFrame({"a": [0, 1, 2, 3]}))
560
assert np.array_equal(df.to_numpy(), np.arange(4).reshape(-1, 1).astype(np.int64))
561
562
df = pl.DataFrame(np.arange(4).reshape(-1, 2).astype(np.int64), schema=["a"])
563
assert_frame_equal(
564
df,
565
pl.DataFrame(
566
{"a": [[0, 1], [2, 3]]}, schema={"a": pl.Array(pl.Int64, shape=2)}
567
),
568
)
569
570
# 2D numpy arrays
571
df = pl.DataFrame({"a": np.arange(5, dtype=np.int64).reshape(1, -1)})
572
assert df.dtypes == [pl.Array(pl.Int64, shape=5)]
573
assert df.shape == (1, 1)
574
575
df = pl.DataFrame({"a": np.arange(10, dtype=np.int64).reshape(2, -1)})
576
assert df.dtypes == [pl.Array(pl.Int64, shape=5)]
577
assert df.shape == (2, 1)
578
assert df.rows() == [([0, 1, 2, 3, 4],), ([5, 6, 7, 8, 9],)]
579
580
test_rows = [(1, 2), (3, 4)]
581
df = pl.DataFrame([np.array(test_rows[0]), np.array(test_rows[1])], orient="row")
582
expected = pl.DataFrame(test_rows, orient="row")
583
assert_frame_equal(df, expected)
584
585
# round trip export/init
586
for shape in ((4, 4), (4, 8), (8, 4)):
587
np_ones = np.ones(shape=shape, dtype=np.float64)
588
names = [f"c{i}" for i in range(shape[1])]
589
590
df = pl.DataFrame(np_ones, schema=names)
591
assert_frame_equal(df, pl.DataFrame(np.asarray(df), schema=names))
592
593
594
def test_init_ndarray_errors() -> None:
595
# 2D array: orientation conflicts with columns
596
with pytest.raises(ValueError):
597
pl.DataFrame(np.array([[1, 2, 3], [4, 5, 6]]), schema=["a", "b"], orient="row")
598
599
with pytest.raises(ValueError):
600
pl.DataFrame(
601
np.array([[1, 2, 3], [4, 5, 6]]),
602
schema=[("a", pl.UInt32), ("b", pl.UInt32)],
603
orient="row",
604
)
605
606
# Invalid orient value
607
with pytest.raises(ValueError):
608
pl.DataFrame(
609
np.array([[1, 2, 3], [4, 5, 6]]),
610
orient="wrong", # type: ignore[arg-type]
611
)
612
613
# Dimensions mismatch
614
with pytest.raises(ValueError):
615
_ = pl.DataFrame(np.array([1, 2, 3]), schema=[])
616
617
# Cannot init with 3D array
618
with pytest.raises(ValueError):
619
_ = pl.DataFrame(np.random.randn(2, 2, 2))
620
621
622
def test_init_ndarray_nan() -> None:
623
# numpy arrays containing NaN
624
df0 = pl.DataFrame(
625
data={"x": [1.0, 2.5, float("nan")], "y": [4.0, float("nan"), 6.5]},
626
)
627
df1 = pl.DataFrame(
628
data={"x": np.array([1.0, 2.5, np.nan]), "y": np.array([4.0, np.nan, 6.5])},
629
)
630
df2 = pl.DataFrame(
631
data={"x": np.array([1.0, 2.5, np.nan]), "y": np.array([4.0, np.nan, 6.5])},
632
nan_to_null=True,
633
)
634
assert_frame_equal(df0, df1)
635
assert df2.rows() == [(1.0, 4.0), (2.5, None), (None, 6.5)]
636
637
s0 = pl.Series("n", [1.0, 2.5, float("nan")])
638
s1 = pl.Series("n", np.array([1.0, 2.5, float("nan")]))
639
s2 = pl.Series("n", np.array([1.0, 2.5, float("nan")]), nan_to_null=True)
640
641
assert_series_equal(s0, s1)
642
assert s2.to_list() == [1.0, 2.5, None]
643
644
645
def test_init_ndarray_square() -> None:
646
# 2D square array; ensure that we maintain convention
647
# (first axis = rows) with/without an explicit schema
648
arr = np.arange(4).reshape(2, 2)
649
assert (
650
[(0, 1), (2, 3)]
651
== pl.DataFrame(arr).rows()
652
== pl.DataFrame(arr, schema=["a", "b"]).rows()
653
)
654
# check that we tie-break square arrays using fortran vs c-contiguous row/col major
655
df_c = pl.DataFrame(
656
data=np.array([[1, 2], [3, 4]], dtype=np.int64, order="C"),
657
schema=["x", "y"],
658
)
659
assert_frame_equal(df_c, pl.DataFrame({"x": [1, 3], "y": [2, 4]}))
660
661
df_f = pl.DataFrame(
662
data=np.array([[1, 2], [3, 4]], dtype=np.int64, order="F"),
663
schema=["x", "y"],
664
)
665
assert_frame_equal(df_f, pl.DataFrame({"x": [1, 2], "y": [3, 4]}))
666
667
668
def test_init_numpy_unavailable(monkeypatch: Any) -> None:
669
monkeypatch.setattr(pl.dataframe.frame, "_check_for_numpy", lambda x: False)
670
with pytest.raises(TypeError):
671
pl.DataFrame(np.array([1, 2, 3]), schema=["a"])
672
673
674
def test_init_numpy_scalars() -> None:
675
df = pl.DataFrame(
676
{
677
"bool": [np.bool_(True), np.bool_(False)],
678
"i8": [np.int8(16), np.int8(64)],
679
"u32": [np.uint32(1234), np.uint32(9876)],
680
}
681
)
682
df_expected = pl.from_records(
683
data=[(True, 16, 1234), (False, 64, 9876)],
684
schema=OrderedDict([("bool", pl.Boolean), ("i8", pl.Int8), ("u32", pl.UInt32)]),
685
orient="row",
686
)
687
assert_frame_equal(df, df_expected)
688
689
690
def test_null_array_print_format() -> None:
691
pa_tbl_null = pa.table({"a": [None, None]})
692
df_null = pl.from_arrow(pa_tbl_null)
693
assert df_null.shape == (2, 1)
694
assert df_null.dtypes == [pl.Null] # type: ignore[union-attr]
695
assert df_null.rows() == [(None,), (None,)] # type: ignore[union-attr]
696
697
assert (
698
str(df_null) == "shape: (2, 1)\n"
699
"┌──────┐\n"
700
"│ a │\n"
701
"│ --- │\n"
702
"│ null │\n"
703
"╞══════╡\n"
704
"│ null │\n"
705
"│ null │\n"
706
"└──────┘"
707
)
708
709
710
def test_init_arrow() -> None:
711
# Handle unnamed column
712
df = pl.DataFrame(pa.table({"a": [1, 2], None: [3, 4]}))
713
expected = pl.DataFrame({"a": [1, 2], "None": [3, 4]})
714
assert_frame_equal(df, expected)
715
716
# Rename columns
717
df = pl.DataFrame(pa.table({"a": [1, 2], "b": [3, 4]}), schema=["c", "d"])
718
expected = pl.DataFrame({"c": [1, 2], "d": [3, 4]})
719
assert_frame_equal(df, expected)
720
721
df = pl.DataFrame(
722
pa.table({"a": [1, 2], None: [3, 4]}),
723
schema=[("c", pl.Int32), ("d", pl.Float32)],
724
)
725
assert df.schema == {"c": pl.Int32, "d": pl.Float32}
726
assert df.rows() == [(1, 3.0), (2, 4.0)]
727
728
# Bad columns argument
729
with pytest.raises(ValueError):
730
pl.DataFrame(pa.table({"a": [1, 2, 3], "b": [4, 5, 6]}), schema=["c", "d", "e"])
731
732
733
def test_init_arrow_dupes() -> None:
734
tbl = pa.Table.from_arrays(
735
arrays=[
736
pa.array([1, 2, 3], type=pa.int32()),
737
pa.array([4, 5, 6], type=pa.int32()),
738
pa.array(
739
[7, 8, 9], type=pa.decimal128(38, 10)
740
), # included as this triggers a panic during construction alongside duplicate fields
741
],
742
schema=pa.schema(
743
[("col", pa.int32()), ("col", pa.int32()), ("col3", pa.decimal128(38, 10))]
744
),
745
)
746
with pytest.raises(
747
DuplicateError,
748
match=r"""column appears more than once; names must be unique: \["col"\]""",
749
):
750
pl.DataFrame(tbl)
751
752
753
def test_init_from_frame() -> None:
754
df1 = pl.DataFrame({"id": [0, 1], "misc": ["a", "b"], "val": [-10, 10]})
755
assert_frame_equal(df1, pl.DataFrame(df1))
756
757
df2 = pl.DataFrame(df1, schema=["a", "b", "c"])
758
assert_frame_equal(df2, pl.DataFrame(df2))
759
760
df3 = pl.DataFrame(df1, schema=["a", "b", "c"], schema_overrides={"val": pl.Int8})
761
assert_frame_equal(df3, pl.DataFrame(df3))
762
763
assert df1.schema == {"id": pl.Int64, "misc": pl.String, "val": pl.Int64}
764
assert df2.schema == {"a": pl.Int64, "b": pl.String, "c": pl.Int64}
765
assert df3.schema == {"a": pl.Int64, "b": pl.String, "c": pl.Int8}
766
assert df1.rows() == df2.rows() == df3.rows()
767
768
s1 = pl.Series("s", df3)
769
s2 = pl.Series(df3)
770
771
assert s1.name == "s"
772
assert s2.name == ""
773
774
775
def test_init_series() -> None:
776
# List of Series
777
df = pl.DataFrame([pl.Series("a", [1, 2, 3]), pl.Series("b", [4, 5, 6])])
778
expected = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
779
assert_frame_equal(df, expected)
780
781
# Tuple of Series
782
df = pl.DataFrame((pl.Series("a", (1, 2, 3)), pl.Series("b", (4, 5, 6))))
783
assert_frame_equal(df, expected)
784
785
df = pl.DataFrame(
786
(pl.Series("a", (1, 2, 3)), pl.Series("b", (4, 5, 6))),
787
schema=[("x", pl.Float64), ("y", pl.Float64)],
788
)
789
assert df.schema == {"x": pl.Float64, "y": pl.Float64}
790
assert df.rows() == [(1.0, 4.0), (2.0, 5.0), (3.0, 6.0)]
791
792
# List of unnamed Series
793
df = pl.DataFrame([pl.Series([1, 2, 3]), pl.Series([4, 5, 6])])
794
col0 = pl.Series("column_0", [1, 2, 3])
795
col1 = pl.Series("column_1", [4, 5, 6])
796
expected = pl.DataFrame([col0, col1])
797
assert_frame_equal(df, expected)
798
799
df = pl.DataFrame([pl.Series([0.0]), pl.Series([1.0])])
800
assert df.schema == {"column_0": pl.Float64, "column_1": pl.Float64}
801
assert df.rows() == [(0.0, 1.0)]
802
803
df = pl.DataFrame(
804
[pl.Series([None]), pl.Series([1.0])],
805
schema=[("x", pl.Date), ("y", pl.Boolean)],
806
)
807
assert df.schema == {"x": pl.Date, "y": pl.Boolean}
808
assert df.rows() == [(None, True)]
809
810
# Single Series
811
df = pl.DataFrame(pl.Series("a", [1, 2, 3]))
812
expected = pl.DataFrame({"a": [1, 2, 3]})
813
assert df.schema == {"a": pl.Int64}
814
assert_frame_equal(df, expected)
815
816
df = pl.DataFrame(pl.Series("a", [1, 2, 3]), schema=[("a", pl.UInt32)])
817
assert df.rows() == [(1,), (2,), (3,)]
818
assert df.schema == {"a": pl.UInt32}
819
820
# nested list, with/without explicit dtype
821
s1 = pl.Series([[[2, 2]]])
822
assert s1.dtype == pl.List(pl.List(pl.Int64))
823
824
s2 = pl.Series([[[2, 2]]], dtype=pl.List(pl.List(pl.UInt8)))
825
assert s2.dtype == pl.List(pl.List(pl.UInt8))
826
827
nested_dtype = pl.List(pl.List(pl.UInt8))
828
s3 = pl.Series("x", dtype=nested_dtype)
829
s4 = pl.Series(s3)
830
for s in (s3, s4):
831
assert s.dtype == nested_dtype
832
assert s.to_list() == []
833
assert s.name == "x"
834
835
s5 = pl.Series("", df, dtype=pl.Int8)
836
assert_series_equal(s5, pl.Series("", [1, 2, 3], dtype=pl.Int8))
837
838
839
@pytest.mark.parametrize(
840
("dtype", "expected_dtype"),
841
[
842
(int, pl.Int64),
843
(bytes, pl.Binary),
844
(float, pl.Float64),
845
(str, pl.String),
846
(date, pl.Date),
847
(time, pl.Time),
848
(datetime, pl.Datetime("us")),
849
(timedelta, pl.Duration("us")),
850
(Decimal, pl.Decimal(scale=0)),
851
],
852
)
853
def test_init_py_dtype(dtype: Any, expected_dtype: PolarsDataType) -> None:
854
for s in (
855
pl.Series("s", [None], dtype=dtype),
856
pl.Series("s", [], dtype=dtype),
857
):
858
assert s.dtype == expected_dtype
859
860
for df in (
861
pl.DataFrame({"col": [None]}, schema={"col": dtype}),
862
pl.DataFrame({"col": []}, schema={"col": dtype}),
863
):
864
assert df.schema == {"col": expected_dtype}
865
866
867
def test_init_py_dtype_misc_float() -> None:
868
assert pl.Series([100], dtype=float).dtype == pl.Float64 # type: ignore[arg-type]
869
870
df = pl.DataFrame(
871
{"x": [100.0], "y": [200], "z": [None]},
872
schema={"x": float, "y": float, "z": float},
873
)
874
assert df.schema == {"x": pl.Float64, "y": pl.Float64, "z": pl.Float64}
875
assert df.rows() == [(100.0, 200.0, None)]
876
877
878
def test_init_seq_of_seq() -> None:
879
# List of lists
880
df = pl.DataFrame([[1, 2, 3], [4, 5, 6]], schema=["a", "b", "c"], orient="row")
881
expected = pl.DataFrame({"a": [1, 4], "b": [2, 5], "c": [3, 6]})
882
assert_frame_equal(df, expected)
883
884
df = pl.DataFrame(
885
[[1, 2, 3], [4, 5, 6]],
886
schema=[("a", pl.Int8), ("b", pl.Int16), ("c", pl.Int32)],
887
orient="row",
888
)
889
assert df.schema == {"a": pl.Int8, "b": pl.Int16, "c": pl.Int32}
890
assert df.rows() == [(1, 2, 3), (4, 5, 6)]
891
892
# Tuple of tuples, default to column orientation
893
df = pl.DataFrame(((1, 2, 3), (4, 5, 6)))
894
expected = pl.DataFrame({"column_0": [1, 2, 3], "column_1": [4, 5, 6]})
895
assert_frame_equal(df, expected)
896
897
# Row orientation
898
df = pl.DataFrame(((1, 2), (3, 4)), schema=("a", "b"), orient="row")
899
expected = pl.DataFrame({"a": [1, 3], "b": [2, 4]})
900
assert_frame_equal(df, expected)
901
902
df = pl.DataFrame(
903
((1, 2), (3, 4)), schema=(("a", pl.Float32), ("b", pl.Float32)), orient="row"
904
)
905
assert df.schema == {"a": pl.Float32, "b": pl.Float32}
906
assert df.rows() == [(1.0, 2.0), (3.0, 4.0)]
907
908
# Wrong orient value
909
with pytest.raises(ValueError):
910
df = pl.DataFrame(((1, 2), (3, 4)), orient="wrong") # type: ignore[arg-type]
911
912
913
def test_init_1d_sequence() -> None:
914
# Empty list
915
df = pl.DataFrame([])
916
assert_frame_equal(df, pl.DataFrame())
917
918
# List/array of strings
919
data = ["a", "b", "c"]
920
for a in (data, np.array(data)):
921
df = pl.DataFrame(a, schema=["s"])
922
expected = pl.DataFrame({"s": data})
923
assert_frame_equal(df, expected)
924
925
df = pl.DataFrame([None, True, False], schema=[("xx", pl.Int8)])
926
assert df.schema == {"xx": pl.Int8}
927
assert df.rows() == [(None,), (1,), (0,)]
928
929
# String sequence
930
result = pl.DataFrame("abc", schema=["s"])
931
expected = pl.DataFrame({"s": ["a", "b", "c"]})
932
assert_frame_equal(result, expected)
933
934
# datetimes sequence
935
df = pl.DataFrame([datetime(2020, 1, 1)], schema={"ts": pl.Datetime("ms")})
936
assert df.schema == {"ts": pl.Datetime("ms")}
937
df = pl.DataFrame(
938
[datetime(2020, 1, 1, tzinfo=timezone.utc)], schema={"ts": pl.Datetime("ms")}
939
)
940
assert df.schema == {"ts": pl.Datetime("ms", "UTC")}
941
df = pl.DataFrame(
942
[datetime(2020, 1, 1, tzinfo=timezone(timedelta(hours=1)))],
943
schema={"ts": pl.Datetime("ms")},
944
)
945
assert df.schema == {"ts": pl.Datetime("ms", "UTC")}
946
df = pl.DataFrame(
947
[datetime(2020, 1, 1, tzinfo=ZoneInfo("Asia/Kathmandu"))],
948
schema={"ts": pl.Datetime("ms")},
949
)
950
assert df.schema == {"ts": pl.Datetime("ms", "Asia/Kathmandu")}
951
952
953
def test_init_pandas(monkeypatch: Any) -> None:
954
pandas_df = pd.DataFrame([[1, 2], [3, 4]], columns=[1, 2])
955
956
# integer column names
957
df = pl.DataFrame(pandas_df)
958
expected = pl.DataFrame({"1": [1, 3], "2": [2, 4]})
959
assert_frame_equal(df, expected)
960
assert df.schema == {"1": pl.Int64, "2": pl.Int64}
961
962
# override column names, types
963
df = pl.DataFrame(pandas_df, schema=[("x", pl.Float64), ("y", pl.Float64)])
964
assert df.schema == {"x": pl.Float64, "y": pl.Float64}
965
assert df.rows() == [(1.0, 2.0), (3.0, 4.0)]
966
967
# subclassed pandas object, with/without data & overrides
968
# type error fixed in pandas-stubs 2.3.0.250703, which doesn't support Python3.9
969
class XSeries(pd.Series): # type: ignore[type-arg, unused-ignore]
970
@property
971
def _constructor(self) -> type:
972
return XSeries
973
974
df = pl.DataFrame(
975
data=[
976
XSeries(name="x", data=[], dtype=np.dtype("<M8[ns]")),
977
XSeries(name="y", data=[], dtype=np.dtype("f8")),
978
XSeries(name="z", data=[], dtype=np.dtype("?")),
979
],
980
)
981
assert df.schema == {"x": pl.Datetime("ns"), "y": pl.Float64, "z": pl.Boolean}
982
assert df.rows() == []
983
984
df = pl.DataFrame(
985
data=[
986
XSeries(
987
name="x",
988
data=[datetime(2022, 10, 31, 10, 30, 45, 123456)],
989
dtype=np.dtype("<M8[ns]"),
990
)
991
],
992
schema={"colx": pl.Datetime("us")},
993
)
994
assert df.schema == {"colx": pl.Datetime("us")}
995
assert df.rows() == [(datetime(2022, 10, 31, 10, 30, 45, 123456),)]
996
997
# pandas is not available
998
monkeypatch.setattr(pl.dataframe.frame, "_check_for_pandas", lambda x: False)
999
1000
# pandas 2.2 and higher implement the Arrow PyCapsule Interface, so the constructor
1001
# will still work even without using pandas APIs
1002
if parse_version(pd.__version__) >= parse_version("2.2.0"):
1003
df = pl.DataFrame(pandas_df)
1004
assert_frame_equal(df, expected)
1005
1006
else:
1007
with pytest.raises(TypeError):
1008
pl.DataFrame(pandas_df)
1009
1010
1011
def test_init_errors() -> None:
1012
# Length mismatch
1013
with pytest.raises(ShapeError):
1014
pl.DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0, 4.0]})
1015
1016
# Columns don't match data dimensions
1017
with pytest.raises(ShapeError):
1018
pl.DataFrame([[1, 2], [3, 4]], schema=["a", "b", "c"])
1019
1020
# Unmatched input
1021
with pytest.raises(TypeError):
1022
pl.DataFrame(0)
1023
1024
1025
def test_init_records() -> None:
1026
dicts = [
1027
{"a": 1, "b": 2},
1028
{"b": 1, "a": 2},
1029
{"a": 1, "b": 2},
1030
]
1031
df = pl.DataFrame(dicts)
1032
expected = pl.DataFrame({"a": [1, 2, 1], "b": [2, 1, 2]})
1033
assert_frame_equal(df, expected)
1034
assert df.to_dicts() == dicts
1035
1036
df_cd = pl.DataFrame(dicts, schema=["a", "c", "d"])
1037
expected_values = {
1038
"a": [1, 2, 1],
1039
"c": [None, None, None],
1040
"d": [None, None, None],
1041
}
1042
assert df_cd.to_dict(as_series=False) == expected_values
1043
1044
data = {"a": 1, "b": 2, "c": 3}
1045
1046
df1 = pl.from_dicts([data])
1047
assert df1.columns == ["a", "b", "c"]
1048
1049
df1.columns = ["x", "y", "z"]
1050
assert df1.columns == ["x", "y", "z"]
1051
1052
df2 = pl.from_dicts([data], schema=["c", "b", "a"])
1053
assert df2.columns == ["c", "b", "a"]
1054
1055
for colname in ("c", "b", "a"):
1056
result = pl.from_dicts([data], schema=[colname])
1057
expected_values = {colname: [data[colname]]}
1058
assert result.to_dict(as_series=False) == expected_values
1059
1060
1061
def test_init_records_schema_order() -> None:
1062
cols: list[str] = ["a", "b", "c", "d"]
1063
data: list[dict[str, int]] = [
1064
{"c": 3, "b": 2, "a": 1},
1065
{"b": 2, "d": 4},
1066
{},
1067
{"a": 1, "b": 2, "c": 3},
1068
{"d": 4, "b": 2, "a": 1},
1069
{"c": 3, "b": 2},
1070
]
1071
lookup = {"a": 1, "b": 2, "c": 3, "d": 4, "e": None}
1072
1073
for constructor in (pl.from_dicts, pl.DataFrame):
1074
# ensure field values are loaded according to the declared schema order
1075
for _ in range(8):
1076
shuffle(data)
1077
shuffle(cols)
1078
1079
df = constructor(data, schema=cols)
1080
for col in df.columns:
1081
assert all(value in (None, lookup[col]) for value in df[col].to_list())
1082
1083
# have schema override inferred types, omit some columns, add a new one
1084
schema = {"a": pl.Int8, "c": pl.Int16, "e": pl.Int32}
1085
df = constructor(data, schema=schema)
1086
1087
assert df.schema == schema
1088
for col in df.columns:
1089
assert all(value in (None, lookup[col]) for value in df[col].to_list())
1090
1091
1092
def test_init_only_columns() -> None:
1093
df = pl.DataFrame(schema=["a", "b", "c"])
1094
expected = pl.DataFrame({"a": [], "b": [], "c": []})
1095
assert_frame_equal(df, expected)
1096
1097
# Validate construction with various flavours of no/empty data
1098
no_data: Any
1099
for no_data in (None, {}, []):
1100
df = pl.DataFrame(
1101
data=no_data,
1102
schema=[
1103
("a", pl.Date),
1104
("b", pl.UInt64),
1105
("c", pl.Int8),
1106
("d", pl.List(pl.UInt8)),
1107
],
1108
)
1109
expected = pl.DataFrame({"a": [], "b": [], "c": []}).with_columns(
1110
pl.col("a").cast(pl.Date),
1111
pl.col("b").cast(pl.UInt64),
1112
pl.col("c").cast(pl.Int8),
1113
)
1114
expected.insert_column(3, pl.Series("d", [], pl.List(pl.UInt8)))
1115
1116
assert df.shape == (0, 4)
1117
assert_frame_equal(df, expected)
1118
assert df.dtypes == [pl.Date, pl.UInt64, pl.Int8, pl.List]
1119
assert pl.List(pl.UInt8).is_(df.schema["d"])
1120
1121
if TYPE_CHECKING:
1122
assert_type(pl.List(pl.UInt8).is_(df.schema["d"]), bool)
1123
1124
dfe = df.clear()
1125
assert len(dfe) == 0
1126
assert df.schema == dfe.schema
1127
assert dfe.shape == df.shape
1128
1129
1130
def test_from_dicts_list_without_dtype() -> None:
1131
result = pl.from_dicts(
1132
[{"id": 1, "hint": ["some_text_here"]}, {"id": 2, "hint": [None]}]
1133
)
1134
expected = pl.DataFrame({"id": [1, 2], "hint": [["some_text_here"], [None]]})
1135
assert_frame_equal(result, expected)
1136
1137
1138
def test_from_dicts_list_struct_without_inner_dtype() -> None:
1139
df = pl.DataFrame(
1140
{
1141
"users": [
1142
[{"category": "A"}, {"category": "B"}],
1143
[{"category": None}, {"category": None}],
1144
],
1145
"days_of_week": [1, 2],
1146
}
1147
)
1148
expected = {
1149
"users": [
1150
[{"category": "A"}, {"category": "B"}],
1151
[{"category": None}, {"category": None}],
1152
],
1153
"days_of_week": [1, 2],
1154
}
1155
assert df.to_dict(as_series=False) == expected
1156
1157
1158
def test_from_dicts_list_struct_without_inner_dtype_5611() -> None:
1159
result = pl.from_dicts(
1160
[
1161
{"a": []},
1162
{"a": [{"b": 1}]},
1163
]
1164
)
1165
expected = pl.DataFrame({"a": [[], [{"b": 1}]]})
1166
assert_frame_equal(result, expected)
1167
1168
1169
def test_from_dict_upcast_primitive() -> None:
1170
df = pl.from_dict({"a": [1, 2.1, 3], "b": [4, 5, 6.4]}, strict=False)
1171
assert df.dtypes == [pl.Float64, pl.Float64]
1172
1173
1174
def test_u64_lit_5031() -> None:
1175
df = pl.DataFrame({"foo": [1, 2, 3]}).with_columns(pl.col("foo").cast(pl.UInt64))
1176
assert df.filter(pl.col("foo") < (1 << 64) - 20).shape == (3, 1)
1177
assert df["foo"].to_list() == [1, 2, 3]
1178
1179
1180
def test_from_dicts_missing_columns() -> None:
1181
# missing columns from some of the data dicts
1182
data = [{"a": 1}, {"b": 2}]
1183
result = pl.from_dicts(data)
1184
expected = pl.DataFrame({"a": [1, None], "b": [None, 2]})
1185
assert_frame_equal(result, expected)
1186
1187
# partial schema with some columns missing; only load the declared keys
1188
data = [{"a": 1, "b": 2}]
1189
result = pl.from_dicts(data, schema=["a"])
1190
expected = pl.DataFrame({"a": [1]})
1191
assert_frame_equal(result, expected)
1192
1193
1194
def test_from_dicts_schema_columns_do_not_match() -> None:
1195
data = [{"a": 1, "b": 2}]
1196
result = pl.from_dicts(data, schema=["x"])
1197
expected = pl.DataFrame({"x": [None]})
1198
assert_frame_equal(result, expected)
1199
1200
1201
def test_from_dicts_infer_integer_types() -> None:
1202
data = [
1203
{
1204
"a": 2**7 - 1,
1205
"b": 2**15 - 1,
1206
"c": 2**31 - 1,
1207
"d": 2**63 - 1,
1208
"e": 2**127 - 1,
1209
"f": 2**128 - 1,
1210
}
1211
]
1212
result = pl.from_dicts(data).schema
1213
# all values inferred as i64 except for values too large for i64
1214
expected = {
1215
"a": pl.Int64,
1216
"b": pl.Int64,
1217
"c": pl.Int64,
1218
"d": pl.Int64,
1219
"e": pl.Int128,
1220
"f": pl.UInt128,
1221
}
1222
assert result == expected
1223
1224
with pytest.raises(OverflowError):
1225
pl.from_dicts([{"too_big": 2**128}])
1226
1227
1228
def test_from_dicts_list_large_int_17006() -> None:
1229
data = [{"x": [2**64 - 1]}]
1230
1231
result = pl.from_dicts(data, schema={"x": pl.List(pl.UInt64)})
1232
expected = pl.DataFrame({"x": [[2**64 - 1]]}, schema={"x": pl.List(pl.UInt64)})
1233
assert_frame_equal(result, expected)
1234
1235
result = pl.from_dicts(data, schema={"x": pl.Array(pl.UInt64, 1)})
1236
expected = pl.DataFrame({"x": [[2**64 - 1]]}, schema={"x": pl.Array(pl.UInt64, 1)})
1237
assert_frame_equal(result, expected)
1238
1239
1240
def test_from_rows_dtype() -> None:
1241
# 50 is the default inference length
1242
# 5182
1243
df = pl.DataFrame(
1244
data=[(None, None)] * 50 + [("1.23", None)],
1245
schema=[("foo", pl.String), ("bar", pl.String)],
1246
orient="row",
1247
)
1248
assert df.dtypes == [pl.String, pl.String]
1249
assert df.null_count().row(0) == (50, 51)
1250
1251
type1 = [{"c1": 206, "c2": "type1", "c3": {"x1": "abcd", "x2": "jkl;"}}]
1252
type2 = [
1253
{"c1": 208, "c2": "type2", "c3": {"a1": "abcd", "a2": "jkl;", "a3": "qwerty"}}
1254
]
1255
1256
df = pl.DataFrame(
1257
data=type1 * 50 + type2,
1258
schema=[("c1", pl.Int32), ("c2", pl.Object), ("c3", pl.Object)],
1259
)
1260
assert df.dtypes == [pl.Int32, pl.Object, pl.Object]
1261
1262
# 50 is the default inference length
1263
# 5266
1264
type1 = [{"c1": 206, "c2": "type1", "c3": {"x1": "abcd", "x2": "jkl;"}}]
1265
type2 = [
1266
{"c1": 208, "c2": "type2", "c3": {"a1": "abcd", "a2": "jkl;", "a3": "qwerty"}}
1267
]
1268
1269
df = pl.DataFrame(
1270
data=type1 * 50 + type2,
1271
schema=[("c1", pl.Int32), ("c2", pl.Object), ("c3", pl.Object)],
1272
)
1273
assert df.dtypes == [pl.Int32, pl.Object, pl.Object]
1274
assert df.null_count().row(0) == (0, 0, 0)
1275
1276
dc = _TestBazDC(d=datetime(2020, 2, 22), e=42.0, f="xyz")
1277
df = pl.DataFrame([[dc]], schema={"d": pl.Object})
1278
assert df.schema == {"d": pl.Object}
1279
assert df.item() == dc
1280
1281
1282
def test_from_dicts_schema() -> None:
1283
data = [{"a": 1, "b": 4}, {"a": 2, "b": 5}, {"a": 3, "b": 6}]
1284
1285
# let polars infer the dtypes, but inform it about a 3rd column.
1286
for schema, overrides in (
1287
({"a": pl.Unknown, "b": pl.Unknown, "c": pl.Int32}, None),
1288
({"a": None, "b": None, "c": None}, {"c": pl.Int32}),
1289
(["a", "b", ("c", pl.Int32)], None),
1290
):
1291
df = pl.from_dicts(
1292
data,
1293
schema=schema, # type: ignore[arg-type]
1294
schema_overrides=overrides,
1295
)
1296
assert df.dtypes == [pl.Int64, pl.Int64, pl.Int32]
1297
assert df.to_dict(as_series=False) == {
1298
"a": [1, 2, 3],
1299
"b": [4, 5, 6],
1300
"c": [None, None, None],
1301
}
1302
1303
# provide data that resolves to an empty frame (ref: scalar
1304
# expansion shortcut), with schema/override hints
1305
schema = {"colx": pl.String, "coly": pl.Int32}
1306
1307
for param in ("schema", "schema_overrides"):
1308
df = pl.DataFrame({"colx": [], "coly": 0}, **{param: schema}) # type: ignore[arg-type]
1309
assert df.schema == schema
1310
1311
1312
def test_nested_read_dicts_4143() -> None:
1313
result = pl.from_dicts(
1314
[
1315
{
1316
"id": 1,
1317
"hint": [
1318
{"some_text_here": "text", "list_": [1, 2, 4]},
1319
{"some_text_here": "text", "list_": [1, 2, 4]},
1320
],
1321
},
1322
{
1323
"id": 2,
1324
"hint": [
1325
{"some_text_here": None, "list_": [1]},
1326
{"some_text_here": None, "list_": [2]},
1327
],
1328
},
1329
]
1330
)
1331
expected = {
1332
"hint": [
1333
[
1334
{"some_text_here": "text", "list_": [1, 2, 4]},
1335
{"some_text_here": "text", "list_": [1, 2, 4]},
1336
],
1337
[
1338
{"some_text_here": None, "list_": [1]},
1339
{"some_text_here": None, "list_": [2]},
1340
],
1341
],
1342
"id": [1, 2],
1343
}
1344
assert result.to_dict(as_series=False) == expected
1345
1346
1347
def test_nested_read_dicts_4143_2() -> None:
1348
result = pl.from_dicts(
1349
[
1350
{
1351
"id": 1,
1352
"hint": [
1353
{"some_text_here": "text", "list_": [1, 2, 4]},
1354
{"some_text_here": "text", "list_": [1, 2, 4]},
1355
],
1356
},
1357
{
1358
"id": 2,
1359
"hint": [
1360
{"some_text_here": "text", "list_": []},
1361
{"some_text_here": "text", "list_": []},
1362
],
1363
},
1364
]
1365
)
1366
1367
assert result.dtypes == [
1368
pl.Int64,
1369
pl.List(pl.Struct({"some_text_here": pl.String, "list_": pl.List(pl.Int64)})),
1370
]
1371
expected = {
1372
"id": [1, 2],
1373
"hint": [
1374
[
1375
{"some_text_here": "text", "list_": [1, 2, 4]},
1376
{"some_text_here": "text", "list_": [1, 2, 4]},
1377
],
1378
[
1379
{"some_text_here": "text", "list_": []},
1380
{"some_text_here": "text", "list_": []},
1381
],
1382
],
1383
}
1384
assert result.to_dict(as_series=False) == expected
1385
1386
1387
def test_from_records_nullable_structs() -> None:
1388
records = [
1389
{"id": 1, "items": [{"item_id": 100, "description": None}]},
1390
{"id": 1, "items": [{"item_id": 100, "description": "hi"}]},
1391
]
1392
1393
schema: list[tuple[str, PolarsDataType]] = [
1394
("id", pl.UInt16),
1395
(
1396
"items",
1397
pl.List(
1398
pl.Struct(
1399
[pl.Field("item_id", pl.UInt32), pl.Field("description", pl.String)]
1400
)
1401
),
1402
),
1403
]
1404
1405
schema_options: list[list[tuple[str, PolarsDataType]] | None] = [schema, None]
1406
for s in schema_options:
1407
result = pl.DataFrame(records, schema=s, orient="row")
1408
expected = {
1409
"id": [1, 1],
1410
"items": [
1411
[{"item_id": 100, "description": None}],
1412
[{"item_id": 100, "description": "hi"}],
1413
],
1414
}
1415
assert result.to_dict(as_series=False) == expected
1416
1417
# check initialisation without any records
1418
df = pl.DataFrame(schema=schema)
1419
dict_schema = dict(schema)
1420
assert df.to_dict(as_series=False) == {"id": [], "items": []}
1421
assert df.schema == dict_schema
1422
1423
dtype: PolarsDataType = dict_schema["items"]
1424
series = pl.Series("items", dtype=dtype)
1425
assert series.to_frame().to_dict(as_series=False) == {"items": []}
1426
assert series.dtype == dict_schema["items"]
1427
assert series.to_list() == []
1428
1429
1430
@pytest.mark.parametrize("unnest_column", ["a", pl.col("a"), cs.by_name("a")])
1431
def test_from_categorical_in_struct_defined_by_schema(unnest_column: Any) -> None:
1432
df = pl.DataFrame(
1433
{"a": [{"value": "foo", "counts": 1}, {"value": "bar", "counts": 2}]},
1434
schema={"a": pl.Struct({"value": pl.Categorical, "counts": pl.UInt32})},
1435
)
1436
1437
expected = pl.DataFrame(
1438
{"value": ["foo", "bar"], "counts": [1, 2]},
1439
schema={"value": pl.Categorical, "counts": pl.UInt32},
1440
)
1441
1442
res_eager = df.unnest(unnest_column)
1443
assert_frame_equal(res_eager, expected, categorical_as_str=True)
1444
1445
res_lazy = df.lazy().unnest(unnest_column)
1446
assert_frame_equal(res_lazy.collect(), expected, categorical_as_str=True)
1447
1448
1449
def test_nested_schema_construction() -> None:
1450
schema = {
1451
"node_groups": pl.List(
1452
pl.Struct(
1453
[
1454
pl.Field("parent_node_group_id", pl.UInt8),
1455
pl.Field(
1456
"nodes",
1457
pl.List(
1458
pl.Struct(
1459
[
1460
pl.Field("name", pl.String),
1461
pl.Field(
1462
"sub_nodes",
1463
pl.List(
1464
pl.Struct(
1465
[
1466
pl.Field("internal_id", pl.UInt64),
1467
pl.Field("value", pl.UInt32),
1468
]
1469
)
1470
),
1471
),
1472
]
1473
)
1474
),
1475
),
1476
]
1477
)
1478
)
1479
}
1480
df = pl.DataFrame(
1481
{
1482
"node_groups": [
1483
[{"nodes": []}, {"nodes": [{"name": "", "sub_nodes": []}]}],
1484
]
1485
},
1486
schema=schema,
1487
)
1488
1489
assert df.schema == schema
1490
assert df.to_dict(as_series=False) == {
1491
"node_groups": [
1492
[
1493
{"parent_node_group_id": None, "nodes": []},
1494
{
1495
"parent_node_group_id": None,
1496
"nodes": [{"name": "", "sub_nodes": []}],
1497
},
1498
]
1499
]
1500
}
1501
1502
1503
def test_nested_schema_construction2() -> None:
1504
schema = {
1505
"node_groups": pl.List(
1506
pl.Struct(
1507
[
1508
pl.Field(
1509
"nodes",
1510
pl.List(
1511
pl.Struct(
1512
[
1513
pl.Field("name", pl.String),
1514
pl.Field("time", pl.UInt32),
1515
]
1516
)
1517
),
1518
)
1519
]
1520
)
1521
)
1522
}
1523
df = pl.DataFrame(
1524
[
1525
{"node_groups": [{"nodes": [{"name": "a", "time": 0}]}]},
1526
{"node_groups": [{"nodes": []}]},
1527
],
1528
schema=schema,
1529
)
1530
assert df.schema == schema
1531
assert df.to_dict(as_series=False) == {
1532
"node_groups": [[{"nodes": [{"name": "a", "time": 0}]}], [{"nodes": []}]]
1533
}
1534
1535
1536
def test_arrow_to_pyseries_with_one_chunk_does_not_copy_data() -> None:
1537
from polars._utils.construction import arrow_to_pyseries
1538
1539
original_array = pa.chunked_array([[1, 2, 3]], type=pa.int64())
1540
pyseries = arrow_to_pyseries("", original_array)
1541
assert (
1542
pyseries.get_chunks()[0]._get_buffer_info()[0]
1543
== original_array.chunks[0].buffers()[1].address
1544
)
1545
1546
1547
def test_init_with_explicit_binary_schema() -> None:
1548
df = pl.DataFrame({"a": [b"hello", b"world"]}, schema={"a": pl.Binary})
1549
assert df.schema == {"a": pl.Binary}
1550
assert df["a"].to_list() == [b"hello", b"world"]
1551
1552
s = pl.Series("a", [b"hello", b"world"], dtype=pl.Binary)
1553
assert s.dtype == pl.Binary
1554
assert s.to_list() == [b"hello", b"world"]
1555
1556
1557
def test_nested_categorical() -> None:
1558
s = pl.Series([["a"]], dtype=pl.List(pl.Categorical))
1559
assert s.to_list() == [["a"]]
1560
assert s.dtype == pl.List(pl.Categorical)
1561
1562
1563
def test_datetime_date_subclasses() -> None:
1564
class FakeDate(date): ...
1565
1566
class FakeDateChild(FakeDate): ...
1567
1568
class FakeDatetime(FakeDate, datetime): ...
1569
1570
result = pl.Series([FakeDate(2020, 1, 1)])
1571
expected = pl.Series([date(2020, 1, 1)])
1572
assert_series_equal(result, expected)
1573
1574
result = pl.Series([FakeDateChild(2020, 1, 1)])
1575
expected = pl.Series([date(2020, 1, 1)])
1576
assert_series_equal(result, expected)
1577
1578
result = pl.Series([FakeDatetime(2020, 1, 1, 3)])
1579
expected = pl.Series([datetime(2020, 1, 1, 3)])
1580
assert_series_equal(result, expected)
1581
1582
1583
def test_list_null_constructor() -> None:
1584
s = pl.Series("a", [[None], [None]], dtype=pl.List(pl.Null))
1585
assert s.dtype == pl.List(pl.Null)
1586
assert s.to_list() == [[None], [None]]
1587
1588
# nested
1589
dtype = pl.List(pl.List(pl.Int8))
1590
values = [
1591
[],
1592
[[], []],
1593
[[33, 112]],
1594
]
1595
s = pl.Series(
1596
name="colx",
1597
values=values,
1598
dtype=dtype,
1599
)
1600
assert s.dtype == dtype
1601
assert s.to_list() == values
1602
1603
# nested
1604
# small order change has influence
1605
dtype = pl.List(pl.List(pl.Int8))
1606
values = [
1607
[[], []],
1608
[],
1609
[[33, 112]],
1610
]
1611
s = pl.Series(
1612
name="colx",
1613
values=values,
1614
dtype=dtype,
1615
)
1616
assert s.dtype == dtype
1617
assert s.to_list() == values
1618
1619
1620
def test_numpy_float_construction_av() -> None:
1621
np_dict = {"a": np.float64(1)}
1622
assert_frame_equal(pl.DataFrame(np_dict), pl.DataFrame({"a": 1.0}))
1623
1624
1625
def test_df_init_dict_raise_on_expression_input() -> None:
1626
with pytest.raises(
1627
TypeError,
1628
match="passing Expr objects to the DataFrame constructor is not supported",
1629
):
1630
pl.DataFrame({"a": pl.int_range(0, 3)})
1631
with pytest.raises(TypeError):
1632
pl.DataFrame({"a": pl.int_range(0, 3), "b": [3, 4, 5]})
1633
1634
# Passing a list of expressions is allowed
1635
df = pl.DataFrame({"a": [pl.int_range(0, 3)]})
1636
assert df.get_column("a").dtype.is_object()
1637
1638
1639
def test_df_schema_sequences() -> None:
1640
schema = [
1641
["address", pl.String],
1642
["key", pl.Int64],
1643
["value", pl.Float32],
1644
]
1645
df = pl.DataFrame(schema=schema) # type: ignore[arg-type]
1646
assert df.schema == {"address": pl.String, "key": pl.Int64, "value": pl.Float32}
1647
1648
1649
def test_df_schema_sequences_incorrect_length() -> None:
1650
schema = [
1651
["address", pl.String, pl.Int8],
1652
["key", pl.Int64],
1653
["value", pl.Float32],
1654
]
1655
with pytest.raises(ValueError):
1656
pl.DataFrame(schema=schema) # type: ignore[arg-type]
1657
1658
1659
@pytest.mark.parametrize(
1660
("input", "infer_func", "expected_dtype"),
1661
[
1662
("f8", numpy_char_code_to_dtype, pl.Float64),
1663
("f4", numpy_char_code_to_dtype, pl.Float32),
1664
("f2", numpy_char_code_to_dtype, pl.Float16),
1665
("i4", numpy_char_code_to_dtype, pl.Int32),
1666
("u1", numpy_char_code_to_dtype, pl.UInt8),
1667
("?", numpy_char_code_to_dtype, pl.Boolean),
1668
("m8", numpy_char_code_to_dtype, pl.Duration("us")),
1669
("M8", numpy_char_code_to_dtype, pl.Datetime("us")),
1670
],
1671
)
1672
def test_numpy_inference(
1673
input: Any,
1674
infer_func: Callable[[Any], PolarsDataType],
1675
expected_dtype: PolarsDataType,
1676
) -> None:
1677
result = infer_func(input)
1678
assert result == expected_dtype
1679
1680
1681
def test_array_construction() -> None:
1682
payload = [[1, 2, 3], None, [4, 2, 3]]
1683
1684
dtype = pl.Array(pl.Int64, 3)
1685
s = pl.Series(payload, dtype=dtype)
1686
assert s.dtype == dtype
1687
assert s.to_list() == payload
1688
1689
# inner type
1690
dtype = pl.Array(pl.UInt8, 2)
1691
payload = [[1, 2], None, [3, 4]]
1692
s = pl.Series(payload, dtype=dtype)
1693
assert s.dtype == dtype
1694
assert s.to_list() == payload
1695
1696
# create using schema
1697
df = pl.DataFrame(
1698
schema={
1699
"a": pl.Array(pl.Float32, 3),
1700
"b": pl.Array(pl.Datetime("ms"), 5),
1701
}
1702
)
1703
assert df.dtypes == [
1704
pl.Array(pl.Float32, 3),
1705
pl.Array(pl.Datetime("ms"), 5),
1706
]
1707
assert df.rows() == []
1708
1709
# from dicts
1710
rows = [
1711
{"row_id": "a", "data": [1, 2, 3]},
1712
{"row_id": "b", "data": [2, 3, 4]},
1713
]
1714
schema = {"row_id": pl.String(), "data": pl.Array(inner=pl.Int64, shape=3)}
1715
df = pl.from_dicts(rows, schema=schema)
1716
assert df.schema == schema
1717
assert df.rows() == [("a", [1, 2, 3]), ("b", [2, 3, 4])]
1718
1719
1720
@pytest.mark.may_fail_auto_streaming
1721
def test_pycapsule_interface(df: pl.DataFrame) -> None:
1722
df = df.rechunk()
1723
pyarrow_table = df.to_arrow()
1724
1725
# Array via C data interface
1726
pyarrow_array = pyarrow_table["bools"].chunk(0)
1727
round_trip_series = pl.Series(PyCapsuleArrayHolder(pyarrow_array))
1728
assert df["bools"].equals(round_trip_series, check_dtypes=True, check_names=False)
1729
1730
# empty Array via C data interface
1731
empty_pyarrow_array = pa.array([], type=pyarrow_array.type)
1732
round_trip_series = pl.Series(PyCapsuleArrayHolder(empty_pyarrow_array))
1733
assert df["bools"].dtype == round_trip_series.dtype
1734
1735
# RecordBatch via C array interface
1736
pyarrow_record_batch = pyarrow_table.to_batches()[0]
1737
round_trip_df = pl.DataFrame(PyCapsuleArrayHolder(pyarrow_record_batch))
1738
assert df.equals(round_trip_df)
1739
1740
# ChunkedArray via C stream interface
1741
pyarrow_chunked_array = pyarrow_table["bools"]
1742
round_trip_series = pl.Series(PyCapsuleStreamHolder(pyarrow_chunked_array))
1743
assert df["bools"].equals(round_trip_series, check_dtypes=True, check_names=False)
1744
1745
# empty ChunkedArray via C stream interface
1746
empty_chunked_array = pa.chunked_array([], type=pyarrow_chunked_array.type)
1747
round_trip_series = pl.Series(PyCapsuleStreamHolder(empty_chunked_array))
1748
assert df["bools"].dtype == round_trip_series.dtype
1749
1750
# Table via C stream interface
1751
round_trip_df = pl.DataFrame(PyCapsuleStreamHolder(pyarrow_table))
1752
assert df.equals(round_trip_df)
1753
1754
# empty Table via C stream interface
1755
empty_df = df[:0].to_arrow()
1756
round_trip_df = pl.DataFrame(PyCapsuleStreamHolder(empty_df))
1757
orig_schema = df.schema
1758
round_trip_schema = round_trip_df.schema
1759
1760
# The "enum" schema is not preserved because categories are lost via C data
1761
# interface
1762
orig_schema.pop("enum")
1763
round_trip_schema.pop("enum")
1764
1765
assert orig_schema == round_trip_schema
1766
1767
# RecordBatchReader via C stream interface
1768
pyarrow_reader = pa.RecordBatchReader.from_batches(
1769
pyarrow_table.schema, pyarrow_table.to_batches()
1770
)
1771
round_trip_df = pl.DataFrame(PyCapsuleStreamHolder(pyarrow_reader))
1772
assert df.equals(round_trip_df)
1773
1774
1775
@pytest.mark.parametrize(
1776
"tz",
1777
[
1778
None,
1779
ZoneInfo("Asia/Tokyo"),
1780
ZoneInfo("Europe/Amsterdam"),
1781
ZoneInfo("UTC"),
1782
timezone.utc,
1783
],
1784
)
1785
def test_init_list_of_dicts_with_timezone(tz: Any) -> None:
1786
dt = datetime(2023, 1, 1, 0, 0, 0, 0, tzinfo=tz)
1787
1788
df = pl.DataFrame([{"dt": dt}, {"dt": dt}])
1789
expected = pl.DataFrame({"dt": [dt, dt]})
1790
assert_frame_equal(df, expected)
1791
1792
assert df.schema == {"dt": pl.Datetime("us", time_zone=tz)}
1793
1794
1795
@pytest.mark.parametrize(
1796
"tz",
1797
[
1798
None,
1799
ZoneInfo("Asia/Tokyo"),
1800
ZoneInfo("Europe/Amsterdam"),
1801
ZoneInfo("UTC"),
1802
timezone.utc,
1803
],
1804
)
1805
def test_init_list_of_nested_dicts_with_timezone(tz: Any) -> None:
1806
dt = datetime(2021, 1, 1, 0, 0, 0, 0, tzinfo=tz)
1807
data = [{"timestamp": {"content": datetime(2021, 1, 1, 0, 0, tzinfo=tz)}}]
1808
1809
df = pl.DataFrame(data).unnest("timestamp")
1810
expected = pl.DataFrame({"content": [dt]})
1811
assert_frame_equal(df, expected)
1812
1813
assert df.schema == {"content": pl.Datetime("us", time_zone=tz)}
1814
1815
1816
def test_init_from_subclassed_types() -> None:
1817
# more detailed test of one custom subclass...
1818
import codecs
1819
1820
class SuperSecretString(str):
1821
def __new__(cls, value: str) -> Self:
1822
return super().__new__(cls, value)
1823
1824
def __repr__(self) -> str:
1825
return codecs.encode(self, "rot_13")
1826
1827
w = "windmolen"
1828
sstr = SuperSecretString(w)
1829
1830
assert sstr == w
1831
assert isinstance(sstr, str)
1832
assert repr(sstr) == "jvaqzbyra"
1833
assert_series_equal(pl.Series([w, w]), pl.Series([sstr, sstr]))
1834
1835
# ...then validate across other basic types
1836
for BaseType, value in (
1837
(int, 42),
1838
(float, 5.5),
1839
(bytes, b"value"),
1840
(str, "value"),
1841
):
1842
1843
class SubclassedType(BaseType): # type: ignore[misc,valid-type]
1844
def __new__(cls, value: Any) -> Self:
1845
return super().__new__(cls, value) # type: ignore[no-any-return]
1846
1847
assert (
1848
pl.Series([value]).to_list() == pl.Series([SubclassedType(value)]).to_list()
1849
)
1850
1851
1852
def test_series_init_with_python_type_7737() -> None:
1853
assert pl.Series([], dtype=int).dtype == pl.Int64 # type: ignore[arg-type]
1854
assert pl.Series([], dtype=float).dtype == pl.Float64 # type: ignore[arg-type]
1855
assert pl.Series([], dtype=bool).dtype == pl.Boolean # type: ignore[arg-type]
1856
assert pl.Series([], dtype=str).dtype == pl.Utf8 # type: ignore[arg-type]
1857
1858
with pytest.raises(TypeError):
1859
pl.Series(["a"], dtype=int) # type: ignore[arg-type]
1860
1861
with pytest.raises(TypeError):
1862
pl.Series([True], dtype=str) # type: ignore[arg-type]
1863
1864
1865
def test_init_from_list_shape_6968() -> None:
1866
df1 = pl.DataFrame([[1, None], [2, None], [3, None]])
1867
df2 = pl.DataFrame([[None, None], [2, None], [3, None]])
1868
assert df1.shape == (2, 3)
1869
assert df2.shape == (2, 3)
1870
1871
1872
def test_dataframe_height() -> None:
1873
assert pl.DataFrame(height=10).shape == (10, 0)
1874
assert pl.DataFrame(pl.DataFrame(height=10)).shape == (10, 0)
1875
1876
assert_frame_equal(
1877
pl.DataFrame({"a": [0, 1, 2]}, height=3), pl.DataFrame({"a": [0, 1, 2]})
1878
)
1879
1880
with pytest.raises(
1881
pl.exceptions.ShapeError,
1882
match=r"height of data \(3\) does not match specified height \(99\)",
1883
):
1884
pl.DataFrame({"a": [0, 1, 2]}, height=99)
1885
1886
with pytest.raises(
1887
pl.exceptions.ShapeError,
1888
match=r"height of data \(3\) does not match specified height \(0\)",
1889
):
1890
pl.DataFrame({"a": [0, 1, 2]}, height=0)
1891
1892
with pytest.raises(
1893
pl.exceptions.ShapeError,
1894
match=r"height of data \(10\) does not match specified height \(5\)",
1895
):
1896
pl.DataFrame(pl.DataFrame(height=10), height=5)
1897
1898
assert_frame_equal(pl.DataFrame(height=10), pl.DataFrame(height=10))
1899
1900
with pytest.raises(AssertionError):
1901
assert_frame_equal(pl.DataFrame(height=5), pl.DataFrame(height=10))
1902
1903
with pytest.raises(AssertionError):
1904
assert_frame_equal(pl.DataFrame(), pl.DataFrame(height=10))
1905
1906