CoCalc -- test_constructors.py

GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/constructors/test_constructors.py
⁶⁹³⁹ views
1
from __future__ import annotations
2

3
from collections import OrderedDict, namedtuple
4
from datetime import date, datetime, time, timedelta, timezone
5
from decimal import Decimal
6
from random import shuffle
7
from typing import TYPE_CHECKING, Any, Literal, NamedTuple
8
from zoneinfo import ZoneInfo
9

10
import numpy as np
11
import pandas as pd
12
import pyarrow as pa
13
import pytest
14
from packaging.version import parse as parse_version
15
from pydantic import BaseModel, Field, TypeAdapter
16

17
import polars as pl
18
import polars.selectors as cs
19
from polars._utils.construction.utils import try_get_type_hints
20
from polars.datatypes import numpy_char_code_to_dtype
21
from polars.dependencies import dataclasses, pydantic
22
from polars.exceptions import DuplicateError, ShapeError
23
from polars.testing import assert_frame_equal, assert_series_equal
24
from tests.unit.utils.pycapsule_utils import PyCapsuleArrayHolder, PyCapsuleStreamHolder
25

26
if TYPE_CHECKING:
27
    import sys
28
    from collections.abc import Callable
29

30
    from polars._typing import PolarsDataType
31

32
    if sys.version_info >= (3, 11):
33
        from typing import Self
34
    else:
35
        from typing_extensions import Self
36

37
    from typing_extensions import assert_type
38

39

40
# -----------------------------------------------------------------------------------
41
# nested dataclasses, models, namedtuple classes (can't be defined inside test func)
42
# -----------------------------------------------------------------------------------
43
@dataclasses.dataclass
44
class _TestBazDC:
45
    d: datetime
46
    e: float
47
    f: str
48

49

50
@dataclasses.dataclass
51
class _TestBarDC:
52
    a: str
53
    b: int
54
    c: _TestBazDC
55

56

57
@dataclasses.dataclass
58
class _TestFooDC:
59
    x: int
60
    y: _TestBarDC
61

62

63
class _TestBazPD(pydantic.BaseModel):
64
    d: datetime
65
    e: float
66
    f: str
67

68

69
class _TestBarPD(pydantic.BaseModel):
70
    a: str
71
    b: int
72
    c: _TestBazPD
73

74

75
class _TestFooPD(pydantic.BaseModel):
76
    x: int
77
    y: _TestBarPD
78

79

80
class _TestBazNT(NamedTuple):
81
    d: datetime
82
    e: float
83
    f: str
84

85

86
class _TestBarNT(NamedTuple):
87
    a: str
88
    b: int
89
    c: _TestBazNT
90

91

92
class _TestFooNT(NamedTuple):
93
    x: int
94
    y: _TestBarNT
95

96

97
# --------------------------------------------------------------------------------
98

99

100
def test_init_dict() -> None:
101
    # Empty dictionary
102
    df = pl.DataFrame({})
103
    assert df.shape == (0, 0)
104

105
    # Empty dictionary/values
106
    df = pl.DataFrame({"a": [], "b": []})
107
    assert df.shape == (0, 2)
108
    assert df.schema == {"a": pl.Null, "b": pl.Null}
109

110
    for df in (
111
        pl.DataFrame({}, schema={"a": pl.Date, "b": pl.String}),
112
        pl.DataFrame({"a": [], "b": []}, schema={"a": pl.Date, "b": pl.String}),
113
    ):
114
        assert df.shape == (0, 2)
115
        assert df.schema == {"a": pl.Date, "b": pl.String}
116

117
    # List of empty list
118
    df = pl.DataFrame({"a": [[]], "b": [[]]})
119
    expected = {"a": pl.List(pl.Null), "b": pl.List(pl.Null)}
120
    assert df.schema == expected
121
    assert df.rows() == [([], [])]
122

123
    # Mixed dtypes
124
    df = pl.DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0]})
125
    assert df.shape == (3, 2)
126
    assert df.columns == ["a", "b"]
127
    assert df.dtypes == [pl.Int64, pl.Float64]
128

129
    df = pl.DataFrame(
130
        data={"a": [1, 2, 3], "b": [1.0, 2.0, 3.0]},
131
        schema=[("a", pl.Int8), ("b", pl.Float32)],
132
    )
133
    assert df.schema == {"a": pl.Int8, "b": pl.Float32}
134

135
    # Values contained in tuples
136
    df = pl.DataFrame({"a": (1, 2, 3), "b": [1.0, 2.0, 3.0]})
137
    assert df.shape == (3, 2)
138

139
    # Datetime/Date types (from both python and integer values)
140
    py_datetimes = (
141
        datetime(2022, 12, 31, 23, 59, 59),
142
        datetime(2022, 12, 31, 23, 59, 59),
143
    )
144
    py_dates = (date(2022, 12, 31), date(2022, 12, 31))
145
    int_datetimes = [1672531199000000, 1672531199000000]
146
    int_dates = [19357, 19357]
147

148
    for dates, datetimes, coldefs in (
149
        # test inferred and explicit (given both py/polars dtypes)
150
        (py_dates, py_datetimes, None),
151
        (py_dates, py_datetimes, [("dt", date), ("dtm", datetime)]),
152
        (py_dates, py_datetimes, [("dt", pl.Date), ("dtm", pl.Datetime)]),
153
        (int_dates, int_datetimes, [("dt", date), ("dtm", datetime)]),
154
        (int_dates, int_datetimes, [("dt", pl.Date), ("dtm", pl.Datetime)]),
155
    ):
156
        df = pl.DataFrame(
157
            data={"dt": dates, "dtm": datetimes},
158
            schema=coldefs,
159
        )
160
        assert df.schema == {"dt": pl.Date, "dtm": pl.Datetime("us")}
161
        assert df.rows() == list(zip(py_dates, py_datetimes))
162

163
    # Overriding dict column names/types
164
    df = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, schema=["c", "d"])
165
    assert df.columns == ["c", "d"]
166

167
    df = pl.DataFrame(
168
        {"a": [1, 2, 3], "b": [4, 5, 6]},
169
        schema=["c", ("d", pl.Int8)],
170
    )  # partial type info (allowed, but mypy doesn't like it ;p)
171
    assert df.schema == {"c": pl.Int64, "d": pl.Int8}
172

173
    df = pl.DataFrame(
174
        {"a": [1, 2, 3], "b": [4, 5, 6]}, schema=[("c", pl.Int8), ("d", pl.Int16)]
175
    )
176
    assert df.schema == {"c": pl.Int8, "d": pl.Int16}
177

178
    # empty nested objects
179
    for empty_val in [None, "", {}, []]:  # type: ignore[var-annotated]
180
        test = [{"field": {"sub_field": empty_val, "sub_field_2": 2}}]
181
        df = pl.DataFrame(test, schema={"field": pl.Object})
182
        assert df["field"][0] == test[0]["field"]
183

184

185
def test_error_string_dtypes() -> None:
186
    with pytest.raises(TypeError, match="cannot parse input"):
187
        pl.DataFrame(
188
            data={"x": [1, 2], "y": [3, 4], "z": [5, 6]},
189
            schema={"x": "i16", "y": "i32", "z": "f32"},  # type: ignore[dict-item]
190
        )
191

192
    with pytest.raises(TypeError, match="cannot parse input"):
193
        pl.Series("n", [1, 2, 3], dtype="f32")  # type: ignore[arg-type]
194

195

196
def test_init_structured_objects() -> None:
197
    # validate init from dataclass, namedtuple, and pydantic model objects
198
    @dataclasses.dataclass
199
    class TradeDC:
200
        timestamp: datetime
201
        ticker: str
202
        price: Decimal
203
        size: int | None = None
204

205
    class TradePD(pydantic.BaseModel):
206
        timestamp: datetime
207
        ticker: str
208
        price: Decimal
209
        size: int
210

211
    class TradeNT(NamedTuple):
212
        timestamp: datetime
213
        ticker: str
214
        price: Decimal
215
        size: int | None = None
216

217
    raw_data = [
218
        (datetime(2022, 9, 8, 14, 30, 45), "AAPL", Decimal("157.5"), 125),
219
        (datetime(2022, 9, 9, 10, 15, 12), "FLSY", Decimal("10.0"), 1500),
220
        (datetime(2022, 9, 7, 15, 30), "MU", Decimal("55.5"), 400),
221
    ]
222
    columns = ["timestamp", "ticker", "price", "size"]
223

224
    for TradeClass in (TradeDC, TradeNT, TradePD):
225
        trades = [TradeClass(**dict(zip(columns, values))) for values in raw_data]  # type: ignore[arg-type]
226

227
        for DF in (pl.DataFrame, pl.from_records):
228
            df = DF(data=trades)
229
            assert df.schema == {
230
                "timestamp": pl.Datetime("us"),
231
                "ticker": pl.String,
232
                "price": pl.Decimal(scale=1),
233
                "size": pl.Int64,
234
            }
235
            assert df.rows() == raw_data
236

237
            # partial dtypes override
238
            df = DF(
239
                data=trades,
240
                schema_overrides={"timestamp": pl.Datetime("ms"), "size": pl.Int32},
241
            )
242
            assert df.schema == {
243
                "timestamp": pl.Datetime("ms"),
244
                "ticker": pl.String,
245
                "price": pl.Decimal(scale=1),
246
                "size": pl.Int32,
247
            }
248

249
        # in conjunction with full 'columns' override (rename/downcast)
250
        df = pl.DataFrame(
251
            data=trades,
252
            schema=[
253
                ("ts", pl.Datetime("ms")),
254
                ("tk", pl.Categorical),
255
                ("pc", pl.Decimal(scale=1)),
256
                ("sz", pl.UInt16),
257
            ],
258
        )
259
        assert df.schema == {
260
            "ts": pl.Datetime("ms"),
261
            "tk": pl.Categorical(ordering="lexical"),
262
            "pc": pl.Decimal(scale=1),
263
            "sz": pl.UInt16,
264
        }
265
        assert df.rows() == raw_data
266

267
        # cover a miscellaneous edge-case when detecting the annotations
268
        assert try_get_type_hints(obj=type(None)) == {}
269

270

271
def test_init_pydantic_2x() -> None:
272
    class PageView(BaseModel):
273
        user_id: str
274
        ts: datetime = Field(alias=["ts", "$date"])  # type: ignore[literal-required, call-overload]
275
        path: str = Field("?", alias=["url", "path"])  # type: ignore[literal-required, call-overload]
276
        referer: str = Field("?", alias="referer")
277
        event: Literal["leave", "enter"] = Field("enter")
278
        time_on_page: int = Field(0, serialization_alias="top")
279

280
    data_json = """
281
    [{
282
        "user_id": "x",
283
        "ts": {"$date": "2021-01-01T00:00:00.000Z"},
284
        "url": "/latest/foobar",
285
        "referer": "https://google.com",
286
        "event": "enter",
287
        "top": 123
288
    }]
289
    """
290
    adapter: TypeAdapter[Any] = TypeAdapter(list[PageView])
291
    models = adapter.validate_json(data_json)
292

293
    result = pl.DataFrame(models)
294
    expected = pl.DataFrame(
295
        {
296
            "user_id": ["x"],
297
            "ts": [datetime(2021, 1, 1, 0, 0)],
298
            "path": ["?"],
299
            "referer": ["https://google.com"],
300
            "event": ["enter"],
301
            "time_on_page": [0],
302
        }
303
    )
304
    assert_frame_equal(result, expected)
305

306

307
def test_init_structured_objects_unhashable() -> None:
308
    # cover an edge-case with namedtuple fields that aren't hashable
309

310
    class Test(NamedTuple):
311
        dt: datetime
312
        info: dict[str, int]
313

314
    test_data = [
315
        Test(datetime(2017, 1, 1), {"a": 1, "b": 2}),
316
        Test(datetime(2017, 1, 2), {"a": 2, "b": 2}),
317
    ]
318
    df = pl.DataFrame(test_data)
319
    # shape: (2, 2)
320
    # ┌─────────────────────┬───────────┐
321
    # │ dt                  ┆ info      │
322
    # │ ---                 ┆ ---       │
323
    # │ datetime[μs]        ┆ struct[2] │
324
    # ╞═════════════════════╪═══════════╡
325
    # │ 2017-01-01 00:00:00 ┆ {1,2}     │
326
    # │ 2017-01-02 00:00:00 ┆ {2,2}     │
327
    # └─────────────────────┴───────────┘
328
    assert df.schema == {
329
        "dt": pl.Datetime(time_unit="us", time_zone=None),
330
        "info": pl.Struct([pl.Field("a", pl.Int64), pl.Field("b", pl.Int64)]),
331
    }
332
    assert df.rows() == test_data
333

334

335
@pytest.mark.parametrize(
336
    ("foo", "bar", "baz"),
337
    [
338
        (_TestFooDC, _TestBarDC, _TestBazDC),
339
        (_TestFooPD, _TestBarPD, _TestBazPD),
340
        (_TestFooNT, _TestBarNT, _TestBazNT),
341
    ],
342
)
343
def test_init_structured_objects_nested(foo: Any, bar: Any, baz: Any) -> None:
344
    data = [
345
        foo(
346
            x=100,
347
            y=bar(
348
                a="hello",
349
                b=800,
350
                c=baz(d=datetime(2023, 4, 12, 10, 30), e=-10.5, f="world"),
351
            ),
352
        )
353
    ]
354
    df = pl.DataFrame(data)
355
    # shape: (1, 2)
356
    # ┌─────┬───────────────────────────────────┐
357
    # │ x   ┆ y                                 │
358
    # │ --- ┆ ---                               │
359
    # │ i64 ┆ struct[3]                         │
360
    # ╞═════╪═══════════════════════════════════╡
361
    # │ 100 ┆ {"hello",800,{2023-04-12 10:30:0… │
362
    # └─────┴───────────────────────────────────┘
363

364
    assert df.schema == {
365
        "x": pl.Int64,
366
        "y": pl.Struct(
367
            [
368
                pl.Field("a", pl.String),
369
                pl.Field("b", pl.Int64),
370
                pl.Field(
371
                    "c",
372
                    pl.Struct(
373
                        [
374
                            pl.Field("d", pl.Datetime("us")),
375
                            pl.Field("e", pl.Float64),
376
                            pl.Field("f", pl.String),
377
                        ]
378
                    ),
379
                ),
380
            ]
381
        ),
382
    }
383
    assert df.row(0) == (
384
        100,
385
        {
386
            "a": "hello",
387
            "b": 800,
388
            "c": {
389
                "d": datetime(2023, 4, 12, 10, 30),
390
                "e": -10.5,
391
                "f": "world",
392
            },
393
        },
394
    )
395

396
    # validate nested schema override
397
    override_struct_schema: dict[str, PolarsDataType] = {
398
        "x": pl.Int16,
399
        "y": pl.Struct(
400
            [
401
                pl.Field("a", pl.String),
402
                pl.Field("b", pl.Int32),
403
                pl.Field(
404
                    name="c",
405
                    dtype=pl.Struct(
406
                        [
407
                            pl.Field("d", pl.Datetime("ms")),
408
                            pl.Field("e", pl.Float32),
409
                            pl.Field("f", pl.String),
410
                        ]
411
                    ),
412
                ),
413
            ]
414
        ),
415
    }
416
    for schema, schema_overrides in (
417
        (None, override_struct_schema),
418
        (override_struct_schema, None),
419
    ):
420
        df = (
421
            pl.DataFrame(data, schema=schema, schema_overrides=schema_overrides)
422
            .unnest("y")
423
            .unnest("c")
424
        )
425
        # shape: (1, 6)
426
        # ┌─────┬───────┬─────┬─────────────────────┬───────┬───────┐
427
        # │ x   ┆ a     ┆ b   ┆ d                   ┆ e     ┆ f     │
428
        # │ --- ┆ ---   ┆ --- ┆ ---                 ┆ ---   ┆ ---   │
429
        # │ i16 ┆ str   ┆ i32 ┆ datetime[ms]        ┆ f32   ┆ str   │
430
        # ╞═════╪═══════╪═════╪═════════════════════╪═══════╪═══════╡
431
        # │ 100 ┆ hello ┆ 800 ┆ 2023-04-12 10:30:00 ┆ -10.5 ┆ world │
432
        # └─────┴───────┴─────┴─────────────────────┴───────┴───────┘
433
        assert df.schema == {
434
            "x": pl.Int16,
435
            "a": pl.String,
436
            "b": pl.Int32,
437
            "d": pl.Datetime("ms"),
438
            "e": pl.Float32,
439
            "f": pl.String,
440
        }
441
        assert df.row(0) == (
442
            100,
443
            "hello",
444
            800,
445
            datetime(2023, 4, 12, 10, 30),
446
            -10.5,
447
            "world",
448
        )
449

450

451
def test_dataclasses_initvar_typing() -> None:
452
    @dataclasses.dataclass
453
    class ABC:
454
        x: date
455
        y: float
456
        z: dataclasses.InitVar[list[str]] = None
457

458
    # should be able to parse the initvar typing...
459
    abc = ABC(x=date(1999, 12, 31), y=100.0)
460
    df = pl.DataFrame([abc])
461

462
    # ...but should not load the initvar field into the DataFrame
463
    assert dataclasses.asdict(abc) == df.rows(named=True)[0]
464

465

466
@pytest.mark.parametrize(
467
    "nt",
468
    [
469
        namedtuple("TestData", ["id", "info"]),  # noqa: PYI024
470
        NamedTuple("TestData", [("id", int), ("info", str)]),
471
    ],
472
)
473
def test_collections_namedtuple(nt: type) -> None:
474
    nt_data = [nt(1, "a"), nt(2, "b"), nt(3, "c")]
475

476
    result = pl.DataFrame(nt_data)
477
    expected = pl.DataFrame({"id": [1, 2, 3], "info": ["a", "b", "c"]})
478
    assert_frame_equal(result, expected)
479

480
    result = pl.DataFrame({"data": nt_data, "misc": ["x", "y", "z"]})
481
    expected = pl.DataFrame(
482
        {
483
            "data": [
484
                {"id": 1, "info": "a"},
485
                {"id": 2, "info": "b"},
486
                {"id": 3, "info": "c"},
487
            ],
488
            "misc": ["x", "y", "z"],
489
        }
490
    )
491
    assert_frame_equal(result, expected)
492

493

494
def test_init_ndarray() -> None:
495
    # Empty array
496
    df = pl.DataFrame(np.array([]))
497
    assert_frame_equal(df, pl.DataFrame())
498

499
    # 1D array
500
    df = pl.DataFrame(np.array([1, 2, 3], dtype=np.int64), schema=["a"])
501
    expected = pl.DataFrame({"a": [1, 2, 3]})
502
    assert_frame_equal(df, expected)
503

504
    df = pl.DataFrame(np.array([1, 2, 3]), schema=[("a", pl.Int32)])
505
    expected = pl.DataFrame({"a": [1, 2, 3]}).with_columns(pl.col("a").cast(pl.Int32))
506
    assert_frame_equal(df, expected)
507

508
    # 2D array (or 2x 1D array) - should default to column orientation (if C-contiguous)
509
    for data in (
510
        np.array([[1, 2], [3, 4]], dtype=np.int64),
511
        [np.array([1, 2], dtype=np.int64), np.array([3, 4], dtype=np.int64)],
512
    ):
513
        df = pl.DataFrame(data, orient="col")
514
        expected = pl.DataFrame({"column_0": [1, 2], "column_1": [3, 4]})
515
        assert_frame_equal(df, expected)
516

517
    df = pl.DataFrame([[1, 2.0, "a"], [None, None, None]], orient="row")
518
    expected = pl.DataFrame(
519
        {"column_0": [1, None], "column_1": [2.0, None], "column_2": ["a", None]}
520
    )
521
    assert_frame_equal(df, expected)
522

523
    df = pl.DataFrame(
524
        data=[[1, 2.0, "a"], [None, None, None]],
525
        schema=[("x", pl.Boolean), ("y", pl.Int32), "z"],
526
        orient="row",
527
    )
528
    assert df.rows() == [(True, 2, "a"), (None, None, None)]
529
    assert df.schema == {"x": pl.Boolean, "y": pl.Int32, "z": pl.String}
530

531
    # 2D array - default to column orientation
532
    df = pl.DataFrame(np.array([[1, 2], [3, 4]], dtype=np.int64))
533
    expected = pl.DataFrame({"column_0": [1, 3], "column_1": [2, 4]})
534
    assert_frame_equal(df, expected)
535

536
    # no orientation, numpy convention
537
    df = pl.DataFrame(np.ones((3, 1), dtype=np.int64))
538
    assert df.shape == (3, 1)
539

540
    # 2D array - row orientation inferred
541
    df = pl.DataFrame(
542
        np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int64), schema=["a", "b", "c"]
543
    )
544
    expected = pl.DataFrame({"a": [1, 4], "b": [2, 5], "c": [3, 6]})
545
    assert_frame_equal(df, expected)
546

547
    # 2D array - column orientation inferred
548
    df = pl.DataFrame(
549
        np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int64), schema=["a", "b"]
550
    )
551
    expected = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
552
    assert_frame_equal(df, expected)
553

554
    # List column from 2D array with single-column schema
555
    df = pl.DataFrame(np.arange(4).reshape(-1, 1).astype(np.int64), schema=["a"])
556
    assert_frame_equal(df, pl.DataFrame({"a": [0, 1, 2, 3]}))
557
    assert np.array_equal(df.to_numpy(), np.arange(4).reshape(-1, 1).astype(np.int64))
558

559
    df = pl.DataFrame(np.arange(4).reshape(-1, 2).astype(np.int64), schema=["a"])
560
    assert_frame_equal(
561
        df,
562
        pl.DataFrame(
563
            {"a": [[0, 1], [2, 3]]}, schema={"a": pl.Array(pl.Int64, shape=2)}
564
        ),
565
    )
566

567
    # 2D numpy arrays
568
    df = pl.DataFrame({"a": np.arange(5, dtype=np.int64).reshape(1, -1)})
569
    assert df.dtypes == [pl.Array(pl.Int64, shape=5)]
570
    assert df.shape == (1, 1)
571

572
    df = pl.DataFrame({"a": np.arange(10, dtype=np.int64).reshape(2, -1)})
573
    assert df.dtypes == [pl.Array(pl.Int64, shape=5)]
574
    assert df.shape == (2, 1)
575
    assert df.rows() == [([0, 1, 2, 3, 4],), ([5, 6, 7, 8, 9],)]
576

577
    test_rows = [(1, 2), (3, 4)]
578
    df = pl.DataFrame([np.array(test_rows[0]), np.array(test_rows[1])], orient="row")
579
    expected = pl.DataFrame(test_rows, orient="row")
580
    assert_frame_equal(df, expected)
581

582
    # round trip export/init
583
    for shape in ((4, 4), (4, 8), (8, 4)):
584
        np_ones = np.ones(shape=shape, dtype=np.float64)
585
        names = [f"c{i}" for i in range(shape[1])]
586

587
        df = pl.DataFrame(np_ones, schema=names)
588
        assert_frame_equal(df, pl.DataFrame(np.asarray(df), schema=names))
589

590

591
def test_init_ndarray_errors() -> None:
592
    # 2D array: orientation conflicts with columns
593
    with pytest.raises(ValueError):
594
        pl.DataFrame(np.array([[1, 2, 3], [4, 5, 6]]), schema=["a", "b"], orient="row")
595

596
    with pytest.raises(ValueError):
597
        pl.DataFrame(
598
            np.array([[1, 2, 3], [4, 5, 6]]),
599
            schema=[("a", pl.UInt32), ("b", pl.UInt32)],
600
            orient="row",
601
        )
602

603
    # Invalid orient value
604
    with pytest.raises(ValueError):
605
        pl.DataFrame(
606
            np.array([[1, 2, 3], [4, 5, 6]]),
607
            orient="wrong",  # type: ignore[arg-type]
608
        )
609

610
    # Dimensions mismatch
611
    with pytest.raises(ValueError):
612
        _ = pl.DataFrame(np.array([1, 2, 3]), schema=[])
613

614
    # Cannot init with 3D array
615
    with pytest.raises(ValueError):
616
        _ = pl.DataFrame(np.random.randn(2, 2, 2))
617

618

619
def test_init_ndarray_nan() -> None:
620
    # numpy arrays containing NaN
621
    df0 = pl.DataFrame(
622
        data={"x": [1.0, 2.5, float("nan")], "y": [4.0, float("nan"), 6.5]},
623
    )
624
    df1 = pl.DataFrame(
625
        data={"x": np.array([1.0, 2.5, np.nan]), "y": np.array([4.0, np.nan, 6.5])},
626
    )
627
    df2 = pl.DataFrame(
628
        data={"x": np.array([1.0, 2.5, np.nan]), "y": np.array([4.0, np.nan, 6.5])},
629
        nan_to_null=True,
630
    )
631
    assert_frame_equal(df0, df1)
632
    assert df2.rows() == [(1.0, 4.0), (2.5, None), (None, 6.5)]
633

634
    s0 = pl.Series("n", [1.0, 2.5, float("nan")])
635
    s1 = pl.Series("n", np.array([1.0, 2.5, float("nan")]))
636
    s2 = pl.Series("n", np.array([1.0, 2.5, float("nan")]), nan_to_null=True)
637

638
    assert_series_equal(s0, s1)
639
    assert s2.to_list() == [1.0, 2.5, None]
640

641

642
def test_init_ndarray_square() -> None:
643
    # 2D square array; ensure that we maintain convention
644
    # (first axis = rows) with/without an explicit schema
645
    arr = np.arange(4).reshape(2, 2)
646
    assert (
647
        [(0, 1), (2, 3)]
648
        == pl.DataFrame(arr).rows()
649
        == pl.DataFrame(arr, schema=["a", "b"]).rows()
650
    )
651
    # check that we tie-break square arrays using fortran vs c-contiguous row/col major
652
    df_c = pl.DataFrame(
653
        data=np.array([[1, 2], [3, 4]], dtype=np.int64, order="C"),
654
        schema=["x", "y"],
655
    )
656
    assert_frame_equal(df_c, pl.DataFrame({"x": [1, 3], "y": [2, 4]}))
657

658
    df_f = pl.DataFrame(
659
        data=np.array([[1, 2], [3, 4]], dtype=np.int64, order="F"),
660
        schema=["x", "y"],
661
    )
662
    assert_frame_equal(df_f, pl.DataFrame({"x": [1, 2], "y": [3, 4]}))
663

664

665
def test_init_numpy_unavailable(monkeypatch: Any) -> None:
666
    monkeypatch.setattr(pl.dataframe.frame, "_check_for_numpy", lambda x: False)
667
    with pytest.raises(TypeError):
668
        pl.DataFrame(np.array([1, 2, 3]), schema=["a"])
669

670

671
def test_init_numpy_scalars() -> None:
672
    df = pl.DataFrame(
673
        {
674
            "bool": [np.bool_(True), np.bool_(False)],
675
            "i8": [np.int8(16), np.int8(64)],
676
            "u32": [np.uint32(1234), np.uint32(9876)],
677
        }
678
    )
679
    df_expected = pl.from_records(
680
        data=[(True, 16, 1234), (False, 64, 9876)],
681
        schema=OrderedDict([("bool", pl.Boolean), ("i8", pl.Int8), ("u32", pl.UInt32)]),
682
        orient="row",
683
    )
684
    assert_frame_equal(df, df_expected)
685

686

687
def test_null_array_print_format() -> None:
688
    pa_tbl_null = pa.table({"a": [None, None]})
689
    df_null = pl.from_arrow(pa_tbl_null)
690
    assert df_null.shape == (2, 1)
691
    assert df_null.dtypes == [pl.Null]  # type: ignore[union-attr]
692
    assert df_null.rows() == [(None,), (None,)]  # type: ignore[union-attr]
693

694
    assert (
695
        str(df_null) == "shape: (2, 1)\n"
696
        "┌──────┐\n"
697
        "│ a    │\n"
698
        "│ ---  │\n"
699
        "│ null │\n"
700
        "╞══════╡\n"
701
        "│ null │\n"
702
        "│ null │\n"
703
        "└──────┘"
704
    )
705

706

707
def test_init_arrow() -> None:
708
    # Handle unnamed column
709
    df = pl.DataFrame(pa.table({"a": [1, 2], None: [3, 4]}))
710
    expected = pl.DataFrame({"a": [1, 2], "None": [3, 4]})
711
    assert_frame_equal(df, expected)
712

713
    # Rename columns
714
    df = pl.DataFrame(pa.table({"a": [1, 2], "b": [3, 4]}), schema=["c", "d"])
715
    expected = pl.DataFrame({"c": [1, 2], "d": [3, 4]})
716
    assert_frame_equal(df, expected)
717

718
    df = pl.DataFrame(
719
        pa.table({"a": [1, 2], None: [3, 4]}),
720
        schema=[("c", pl.Int32), ("d", pl.Float32)],
721
    )
722
    assert df.schema == {"c": pl.Int32, "d": pl.Float32}
723
    assert df.rows() == [(1, 3.0), (2, 4.0)]
724

725
    # Bad columns argument
726
    with pytest.raises(ValueError):
727
        pl.DataFrame(pa.table({"a": [1, 2, 3], "b": [4, 5, 6]}), schema=["c", "d", "e"])
728

729

730
def test_init_arrow_dupes() -> None:
731
    tbl = pa.Table.from_arrays(
732
        arrays=[
733
            pa.array([1, 2, 3], type=pa.int32()),
734
            pa.array([4, 5, 6], type=pa.int32()),
735
            pa.array(
736
                [7, 8, 9], type=pa.decimal128(38, 10)
737
            ),  # included as this triggers a panic during construction alongside duplicate fields
738
        ],
739
        schema=pa.schema(
740
            [("col", pa.int32()), ("col", pa.int32()), ("col3", pa.decimal128(38, 10))]
741
        ),
742
    )
743
    with pytest.raises(
744
        DuplicateError,
745
        match=r"""column appears more than once; names must be unique: \["col"\]""",
746
    ):
747
        pl.DataFrame(tbl)
748

749

750
def test_init_from_frame() -> None:
751
    df1 = pl.DataFrame({"id": [0, 1], "misc": ["a", "b"], "val": [-10, 10]})
752
    assert_frame_equal(df1, pl.DataFrame(df1))
753

754
    df2 = pl.DataFrame(df1, schema=["a", "b", "c"])
755
    assert_frame_equal(df2, pl.DataFrame(df2))
756

757
    df3 = pl.DataFrame(df1, schema=["a", "b", "c"], schema_overrides={"val": pl.Int8})
758
    assert_frame_equal(df3, pl.DataFrame(df3))
759

760
    assert df1.schema == {"id": pl.Int64, "misc": pl.String, "val": pl.Int64}
761
    assert df2.schema == {"a": pl.Int64, "b": pl.String, "c": pl.Int64}
762
    assert df3.schema == {"a": pl.Int64, "b": pl.String, "c": pl.Int8}
763
    assert df1.rows() == df2.rows() == df3.rows()
764

765
    s1 = pl.Series("s", df3)
766
    s2 = pl.Series(df3)
767

768
    assert s1.name == "s"
769
    assert s2.name == ""
770

771

772
def test_init_series() -> None:
773
    # List of Series
774
    df = pl.DataFrame([pl.Series("a", [1, 2, 3]), pl.Series("b", [4, 5, 6])])
775
    expected = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
776
    assert_frame_equal(df, expected)
777

778
    # Tuple of Series
779
    df = pl.DataFrame((pl.Series("a", (1, 2, 3)), pl.Series("b", (4, 5, 6))))
780
    assert_frame_equal(df, expected)
781

782
    df = pl.DataFrame(
783
        (pl.Series("a", (1, 2, 3)), pl.Series("b", (4, 5, 6))),
784
        schema=[("x", pl.Float64), ("y", pl.Float64)],
785
    )
786
    assert df.schema == {"x": pl.Float64, "y": pl.Float64}
787
    assert df.rows() == [(1.0, 4.0), (2.0, 5.0), (3.0, 6.0)]
788

789
    # List of unnamed Series
790
    df = pl.DataFrame([pl.Series([1, 2, 3]), pl.Series([4, 5, 6])])
791
    col0 = pl.Series("column_0", [1, 2, 3])
792
    col1 = pl.Series("column_1", [4, 5, 6])
793
    expected = pl.DataFrame([col0, col1])
794
    assert_frame_equal(df, expected)
795

796
    df = pl.DataFrame([pl.Series([0.0]), pl.Series([1.0])])
797
    assert df.schema == {"column_0": pl.Float64, "column_1": pl.Float64}
798
    assert df.rows() == [(0.0, 1.0)]
799

800
    df = pl.DataFrame(
801
        [pl.Series([None]), pl.Series([1.0])],
802
        schema=[("x", pl.Date), ("y", pl.Boolean)],
803
    )
804
    assert df.schema == {"x": pl.Date, "y": pl.Boolean}
805
    assert df.rows() == [(None, True)]
806

807
    # Single Series
808
    df = pl.DataFrame(pl.Series("a", [1, 2, 3]))
809
    expected = pl.DataFrame({"a": [1, 2, 3]})
810
    assert df.schema == {"a": pl.Int64}
811
    assert_frame_equal(df, expected)
812

813
    df = pl.DataFrame(pl.Series("a", [1, 2, 3]), schema=[("a", pl.UInt32)])
814
    assert df.rows() == [(1,), (2,), (3,)]
815
    assert df.schema == {"a": pl.UInt32}
816

817
    # nested list, with/without explicit dtype
818
    s1 = pl.Series([[[2, 2]]])
819
    assert s1.dtype == pl.List(pl.List(pl.Int64))
820

821
    s2 = pl.Series([[[2, 2]]], dtype=pl.List(pl.List(pl.UInt8)))
822
    assert s2.dtype == pl.List(pl.List(pl.UInt8))
823

824
    nested_dtype = pl.List(pl.List(pl.UInt8))
825
    s3 = pl.Series("x", dtype=nested_dtype)
826
    s4 = pl.Series(s3)
827
    for s in (s3, s4):
828
        assert s.dtype == nested_dtype
829
        assert s.to_list() == []
830
        assert s.name == "x"
831

832
    s5 = pl.Series("", df, dtype=pl.Int8)
833
    assert_series_equal(s5, pl.Series("", [1, 2, 3], dtype=pl.Int8))
834

835

836
@pytest.mark.parametrize(
837
    ("dtype", "expected_dtype"),
838
    [
839
        (int, pl.Int64),
840
        (bytes, pl.Binary),
841
        (float, pl.Float64),
842
        (str, pl.String),
843
        (date, pl.Date),
844
        (time, pl.Time),
845
        (datetime, pl.Datetime("us")),
846
        (timedelta, pl.Duration("us")),
847
        (Decimal, pl.Decimal(precision=None, scale=0)),
848
    ],
849
)
850
def test_init_py_dtype(dtype: Any, expected_dtype: PolarsDataType) -> None:
851
    for s in (
852
        pl.Series("s", [None], dtype=dtype),
853
        pl.Series("s", [], dtype=dtype),
854
    ):
855
        assert s.dtype == expected_dtype
856

857
    for df in (
858
        pl.DataFrame({"col": [None]}, schema={"col": dtype}),
859
        pl.DataFrame({"col": []}, schema={"col": dtype}),
860
    ):
861
        assert df.schema == {"col": expected_dtype}
862

863

864
def test_init_py_dtype_misc_float() -> None:
865
    assert pl.Series([100], dtype=float).dtype == pl.Float64  # type: ignore[arg-type]
866

867
    df = pl.DataFrame(
868
        {"x": [100.0], "y": [200], "z": [None]},
869
        schema={"x": float, "y": float, "z": float},
870
    )
871
    assert df.schema == {"x": pl.Float64, "y": pl.Float64, "z": pl.Float64}
872
    assert df.rows() == [(100.0, 200.0, None)]
873

874

875
def test_init_seq_of_seq() -> None:
876
    # List of lists
877
    df = pl.DataFrame([[1, 2, 3], [4, 5, 6]], schema=["a", "b", "c"], orient="row")
878
    expected = pl.DataFrame({"a": [1, 4], "b": [2, 5], "c": [3, 6]})
879
    assert_frame_equal(df, expected)
880

881
    df = pl.DataFrame(
882
        [[1, 2, 3], [4, 5, 6]],
883
        schema=[("a", pl.Int8), ("b", pl.Int16), ("c", pl.Int32)],
884
        orient="row",
885
    )
886
    assert df.schema == {"a": pl.Int8, "b": pl.Int16, "c": pl.Int32}
887
    assert df.rows() == [(1, 2, 3), (4, 5, 6)]
888

889
    # Tuple of tuples, default to column orientation
890
    df = pl.DataFrame(((1, 2, 3), (4, 5, 6)))
891
    expected = pl.DataFrame({"column_0": [1, 2, 3], "column_1": [4, 5, 6]})
892
    assert_frame_equal(df, expected)
893

894
    # Row orientation
895
    df = pl.DataFrame(((1, 2), (3, 4)), schema=("a", "b"), orient="row")
896
    expected = pl.DataFrame({"a": [1, 3], "b": [2, 4]})
897
    assert_frame_equal(df, expected)
898

899
    df = pl.DataFrame(
900
        ((1, 2), (3, 4)), schema=(("a", pl.Float32), ("b", pl.Float32)), orient="row"
901
    )
902
    assert df.schema == {"a": pl.Float32, "b": pl.Float32}
903
    assert df.rows() == [(1.0, 2.0), (3.0, 4.0)]
904

905
    # Wrong orient value
906
    with pytest.raises(ValueError):
907
        df = pl.DataFrame(((1, 2), (3, 4)), orient="wrong")  # type: ignore[arg-type]
908

909

910
def test_init_1d_sequence() -> None:
911
    # Empty list
912
    df = pl.DataFrame([])
913
    assert_frame_equal(df, pl.DataFrame())
914

915
    # List/array of strings
916
    data = ["a", "b", "c"]
917
    for a in (data, np.array(data)):
918
        df = pl.DataFrame(a, schema=["s"])
919
        expected = pl.DataFrame({"s": data})
920
        assert_frame_equal(df, expected)
921

922
    df = pl.DataFrame([None, True, False], schema=[("xx", pl.Int8)])
923
    assert df.schema == {"xx": pl.Int8}
924
    assert df.rows() == [(None,), (1,), (0,)]
925

926
    # String sequence
927
    result = pl.DataFrame("abc", schema=["s"])
928
    expected = pl.DataFrame({"s": ["a", "b", "c"]})
929
    assert_frame_equal(result, expected)
930

931
    # datetimes sequence
932
    df = pl.DataFrame([datetime(2020, 1, 1)], schema={"ts": pl.Datetime("ms")})
933
    assert df.schema == {"ts": pl.Datetime("ms")}
934
    df = pl.DataFrame(
935
        [datetime(2020, 1, 1, tzinfo=timezone.utc)], schema={"ts": pl.Datetime("ms")}
936
    )
937
    assert df.schema == {"ts": pl.Datetime("ms", "UTC")}
938
    df = pl.DataFrame(
939
        [datetime(2020, 1, 1, tzinfo=timezone(timedelta(hours=1)))],
940
        schema={"ts": pl.Datetime("ms")},
941
    )
942
    assert df.schema == {"ts": pl.Datetime("ms", "UTC")}
943
    df = pl.DataFrame(
944
        [datetime(2020, 1, 1, tzinfo=ZoneInfo("Asia/Kathmandu"))],
945
        schema={"ts": pl.Datetime("ms")},
946
    )
947
    assert df.schema == {"ts": pl.Datetime("ms", "Asia/Kathmandu")}
948

949

950
def test_init_pandas(monkeypatch: Any) -> None:
951
    pandas_df = pd.DataFrame([[1, 2], [3, 4]], columns=[1, 2])
952

953
    # integer column names
954
    df = pl.DataFrame(pandas_df)
955
    expected = pl.DataFrame({"1": [1, 3], "2": [2, 4]})
956
    assert_frame_equal(df, expected)
957
    assert df.schema == {"1": pl.Int64, "2": pl.Int64}
958

959
    # override column names, types
960
    df = pl.DataFrame(pandas_df, schema=[("x", pl.Float64), ("y", pl.Float64)])
961
    assert df.schema == {"x": pl.Float64, "y": pl.Float64}
962
    assert df.rows() == [(1.0, 2.0), (3.0, 4.0)]
963

964
    # subclassed pandas object, with/without data & overrides
965
    # type error fixed in pandas-stubs 2.3.0.250703, which doesn't support Python3.9
966
    class XSeries(pd.Series):  # type: ignore[type-arg, unused-ignore]
967
        @property
968
        def _constructor(self) -> type:
969
            return XSeries
970

971
    df = pl.DataFrame(
972
        data=[
973
            XSeries(name="x", data=[], dtype=np.dtype("<M8[ns]")),
974
            XSeries(name="y", data=[], dtype=np.dtype("f8")),
975
            XSeries(name="z", data=[], dtype=np.dtype("?")),
976
        ],
977
    )
978
    assert df.schema == {"x": pl.Datetime("ns"), "y": pl.Float64, "z": pl.Boolean}
979
    assert df.rows() == []
980

981
    df = pl.DataFrame(
982
        data=[
983
            XSeries(
984
                name="x",
985
                data=[datetime(2022, 10, 31, 10, 30, 45, 123456)],
986
                dtype=np.dtype("<M8[ns]"),
987
            )
988
        ],
989
        schema={"colx": pl.Datetime("us")},
990
    )
991
    assert df.schema == {"colx": pl.Datetime("us")}
992
    assert df.rows() == [(datetime(2022, 10, 31, 10, 30, 45, 123456),)]
993

994
    # pandas is not available
995
    monkeypatch.setattr(pl.dataframe.frame, "_check_for_pandas", lambda x: False)
996

997
    # pandas 2.2 and higher implement the Arrow PyCapsule Interface, so the constructor
998
    # will still work even without using pandas APIs
999
    if parse_version(pd.__version__) >= parse_version("2.2.0"):
1000
        df = pl.DataFrame(pandas_df)
1001
        assert_frame_equal(df, expected)
1002

1003
    else:
1004
        with pytest.raises(TypeError):
1005
            pl.DataFrame(pandas_df)
1006

1007

1008
def test_init_errors() -> None:
1009
    # Length mismatch
1010
    with pytest.raises(ShapeError):
1011
        pl.DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0, 4.0]})
1012

1013
    # Columns don't match data dimensions
1014
    with pytest.raises(ShapeError):
1015
        pl.DataFrame([[1, 2], [3, 4]], schema=["a", "b", "c"])
1016

1017
    # Unmatched input
1018
    with pytest.raises(TypeError):
1019
        pl.DataFrame(0)
1020

1021

1022
def test_init_records() -> None:
1023
    dicts = [
1024
        {"a": 1, "b": 2},
1025
        {"b": 1, "a": 2},
1026
        {"a": 1, "b": 2},
1027
    ]
1028
    df = pl.DataFrame(dicts)
1029
    expected = pl.DataFrame({"a": [1, 2, 1], "b": [2, 1, 2]})
1030
    assert_frame_equal(df, expected)
1031
    assert df.to_dicts() == dicts
1032

1033
    df_cd = pl.DataFrame(dicts, schema=["a", "c", "d"])
1034
    expected_values = {
1035
        "a": [1, 2, 1],
1036
        "c": [None, None, None],
1037
        "d": [None, None, None],
1038
    }
1039
    assert df_cd.to_dict(as_series=False) == expected_values
1040

1041
    data = {"a": 1, "b": 2, "c": 3}
1042

1043
    df1 = pl.from_dicts([data])
1044
    assert df1.columns == ["a", "b", "c"]
1045

1046
    df1.columns = ["x", "y", "z"]
1047
    assert df1.columns == ["x", "y", "z"]
1048

1049
    df2 = pl.from_dicts([data], schema=["c", "b", "a"])
1050
    assert df2.columns == ["c", "b", "a"]
1051

1052
    for colname in ("c", "b", "a"):
1053
        result = pl.from_dicts([data], schema=[colname])
1054
        expected_values = {colname: [data[colname]]}
1055
        assert result.to_dict(as_series=False) == expected_values
1056

1057

1058
def test_init_records_schema_order() -> None:
1059
    cols: list[str] = ["a", "b", "c", "d"]
1060
    data: list[dict[str, int]] = [
1061
        {"c": 3, "b": 2, "a": 1},
1062
        {"b": 2, "d": 4},
1063
        {},
1064
        {"a": 1, "b": 2, "c": 3},
1065
        {"d": 4, "b": 2, "a": 1},
1066
        {"c": 3, "b": 2},
1067
    ]
1068
    lookup = {"a": 1, "b": 2, "c": 3, "d": 4, "e": None}
1069

1070
    for constructor in (pl.from_dicts, pl.DataFrame):
1071
        # ensure field values are loaded according to the declared schema order
1072
        for _ in range(8):
1073
            shuffle(data)
1074
            shuffle(cols)
1075

1076
            df = constructor(data, schema=cols)
1077
            for col in df.columns:
1078
                assert all(value in (None, lookup[col]) for value in df[col].to_list())
1079

1080
        # have schema override inferred types, omit some columns, add a new one
1081
        schema = {"a": pl.Int8, "c": pl.Int16, "e": pl.Int32}
1082
        df = constructor(data, schema=schema)
1083

1084
        assert df.schema == schema
1085
        for col in df.columns:
1086
            assert all(value in (None, lookup[col]) for value in df[col].to_list())
1087

1088

1089
def test_init_only_columns() -> None:
1090
    df = pl.DataFrame(schema=["a", "b", "c"])
1091
    expected = pl.DataFrame({"a": [], "b": [], "c": []})
1092
    assert_frame_equal(df, expected)
1093

1094
    # Validate construction with various flavours of no/empty data
1095
    no_data: Any
1096
    for no_data in (None, {}, []):
1097
        df = pl.DataFrame(
1098
            data=no_data,
1099
            schema=[
1100
                ("a", pl.Date),
1101
                ("b", pl.UInt64),
1102
                ("c", pl.Int8),
1103
                ("d", pl.List(pl.UInt8)),
1104
            ],
1105
        )
1106
        expected = pl.DataFrame({"a": [], "b": [], "c": []}).with_columns(
1107
            pl.col("a").cast(pl.Date),
1108
            pl.col("b").cast(pl.UInt64),
1109
            pl.col("c").cast(pl.Int8),
1110
        )
1111
        expected.insert_column(3, pl.Series("d", [], pl.List(pl.UInt8)))
1112

1113
        assert df.shape == (0, 4)
1114
        assert_frame_equal(df, expected)
1115
        assert df.dtypes == [pl.Date, pl.UInt64, pl.Int8, pl.List]
1116
        assert pl.List(pl.UInt8).is_(df.schema["d"])
1117

1118
        if TYPE_CHECKING:
1119
            assert_type(pl.List(pl.UInt8).is_(df.schema["d"]), bool)
1120

1121
        dfe = df.clear()
1122
        assert len(dfe) == 0
1123
        assert df.schema == dfe.schema
1124
        assert dfe.shape == df.shape
1125

1126

1127
def test_from_dicts_list_without_dtype() -> None:
1128
    result = pl.from_dicts(
1129
        [{"id": 1, "hint": ["some_text_here"]}, {"id": 2, "hint": [None]}]
1130
    )
1131
    expected = pl.DataFrame({"id": [1, 2], "hint": [["some_text_here"], [None]]})
1132
    assert_frame_equal(result, expected)
1133

1134

1135
def test_from_dicts_list_struct_without_inner_dtype() -> None:
1136
    df = pl.DataFrame(
1137
        {
1138
            "users": [
1139
                [{"category": "A"}, {"category": "B"}],
1140
                [{"category": None}, {"category": None}],
1141
            ],
1142
            "days_of_week": [1, 2],
1143
        }
1144
    )
1145
    expected = {
1146
        "users": [
1147
            [{"category": "A"}, {"category": "B"}],
1148
            [{"category": None}, {"category": None}],
1149
        ],
1150
        "days_of_week": [1, 2],
1151
    }
1152
    assert df.to_dict(as_series=False) == expected
1153

1154

1155
def test_from_dicts_list_struct_without_inner_dtype_5611() -> None:
1156
    result = pl.from_dicts(
1157
        [
1158
            {"a": []},
1159
            {"a": [{"b": 1}]},
1160
        ]
1161
    )
1162
    expected = pl.DataFrame({"a": [[], [{"b": 1}]]})
1163
    assert_frame_equal(result, expected)
1164

1165

1166
def test_from_dict_upcast_primitive() -> None:
1167
    df = pl.from_dict({"a": [1, 2.1, 3], "b": [4, 5, 6.4]}, strict=False)
1168
    assert df.dtypes == [pl.Float64, pl.Float64]
1169

1170

1171
def test_u64_lit_5031() -> None:
1172
    df = pl.DataFrame({"foo": [1, 2, 3]}).with_columns(pl.col("foo").cast(pl.UInt64))
1173
    assert df.filter(pl.col("foo") < (1 << 64) - 20).shape == (3, 1)
1174
    assert df["foo"].to_list() == [1, 2, 3]
1175

1176

1177
def test_from_dicts_missing_columns() -> None:
1178
    # missing columns from some of the data dicts
1179
    data = [{"a": 1}, {"b": 2}]
1180
    result = pl.from_dicts(data)
1181
    expected = pl.DataFrame({"a": [1, None], "b": [None, 2]})
1182
    assert_frame_equal(result, expected)
1183

1184
    # partial schema with some columns missing; only load the declared keys
1185
    data = [{"a": 1, "b": 2}]
1186
    result = pl.from_dicts(data, schema=["a"])
1187
    expected = pl.DataFrame({"a": [1]})
1188
    assert_frame_equal(result, expected)
1189

1190

1191
def test_from_dicts_schema_columns_do_not_match() -> None:
1192
    data = [{"a": 1, "b": 2}]
1193
    result = pl.from_dicts(data, schema=["x"])
1194
    expected = pl.DataFrame({"x": [None]})
1195
    assert_frame_equal(result, expected)
1196

1197

1198
def test_from_dicts_infer_integer_types() -> None:
1199
    data = [
1200
        {
1201
            "a": 2**7 - 1,
1202
            "b": 2**15 - 1,
1203
            "c": 2**31 - 1,
1204
            "d": 2**63 - 1,
1205
            "e": 2**127 - 1,
1206
        }
1207
    ]
1208
    result = pl.from_dicts(data).schema
1209
    # all values inferred as i64 except for values too large for i64
1210
    expected = {
1211
        "a": pl.Int64,
1212
        "b": pl.Int64,
1213
        "c": pl.Int64,
1214
        "d": pl.Int64,
1215
        "e": pl.Int128,
1216
    }
1217
    assert result == expected
1218

1219
    with pytest.raises(OverflowError):
1220
        pl.from_dicts([{"too_big": 2**127}])
1221

1222

1223
def test_from_dicts_list_large_int_17006() -> None:
1224
    data = [{"x": [2**64 - 1]}]
1225

1226
    result = pl.from_dicts(data, schema={"x": pl.List(pl.UInt64)})
1227
    expected = pl.DataFrame({"x": [[2**64 - 1]]}, schema={"x": pl.List(pl.UInt64)})
1228
    assert_frame_equal(result, expected)
1229

1230
    result = pl.from_dicts(data, schema={"x": pl.Array(pl.UInt64, 1)})
1231
    expected = pl.DataFrame({"x": [[2**64 - 1]]}, schema={"x": pl.Array(pl.UInt64, 1)})
1232
    assert_frame_equal(result, expected)
1233

1234

1235
def test_from_rows_dtype() -> None:
1236
    # 50 is the default inference length
1237
    # 5182
1238
    df = pl.DataFrame(
1239
        data=[(None, None)] * 50 + [("1.23", None)],
1240
        schema=[("foo", pl.String), ("bar", pl.String)],
1241
        orient="row",
1242
    )
1243
    assert df.dtypes == [pl.String, pl.String]
1244
    assert df.null_count().row(0) == (50, 51)
1245

1246
    type1 = [{"c1": 206, "c2": "type1", "c3": {"x1": "abcd", "x2": "jkl;"}}]
1247
    type2 = [
1248
        {"c1": 208, "c2": "type2", "c3": {"a1": "abcd", "a2": "jkl;", "a3": "qwerty"}}
1249
    ]
1250

1251
    df = pl.DataFrame(
1252
        data=type1 * 50 + type2,
1253
        schema=[("c1", pl.Int32), ("c2", pl.Object), ("c3", pl.Object)],
1254
    )
1255
    assert df.dtypes == [pl.Int32, pl.Object, pl.Object]
1256

1257
    # 50 is the default inference length
1258
    # 5266
1259
    type1 = [{"c1": 206, "c2": "type1", "c3": {"x1": "abcd", "x2": "jkl;"}}]
1260
    type2 = [
1261
        {"c1": 208, "c2": "type2", "c3": {"a1": "abcd", "a2": "jkl;", "a3": "qwerty"}}
1262
    ]
1263

1264
    df = pl.DataFrame(
1265
        data=type1 * 50 + type2,
1266
        schema=[("c1", pl.Int32), ("c2", pl.Object), ("c3", pl.Object)],
1267
    )
1268
    assert df.dtypes == [pl.Int32, pl.Object, pl.Object]
1269
    assert df.null_count().row(0) == (0, 0, 0)
1270

1271
    dc = _TestBazDC(d=datetime(2020, 2, 22), e=42.0, f="xyz")
1272
    df = pl.DataFrame([[dc]], schema={"d": pl.Object})
1273
    assert df.schema == {"d": pl.Object}
1274
    assert df.item() == dc
1275

1276

1277
def test_from_dicts_schema() -> None:
1278
    data = [{"a": 1, "b": 4}, {"a": 2, "b": 5}, {"a": 3, "b": 6}]
1279

1280
    # let polars infer the dtypes, but inform it about a 3rd column.
1281
    for schema, overrides in (
1282
        ({"a": pl.Unknown, "b": pl.Unknown, "c": pl.Int32}, None),
1283
        ({"a": None, "b": None, "c": None}, {"c": pl.Int32}),
1284
        (["a", "b", ("c", pl.Int32)], None),
1285
    ):
1286
        df = pl.from_dicts(
1287
            data,
1288
            schema=schema,  # type: ignore[arg-type]
1289
            schema_overrides=overrides,
1290
        )
1291
        assert df.dtypes == [pl.Int64, pl.Int64, pl.Int32]
1292
        assert df.to_dict(as_series=False) == {
1293
            "a": [1, 2, 3],
1294
            "b": [4, 5, 6],
1295
            "c": [None, None, None],
1296
        }
1297

1298
    # provide data that resolves to an empty frame (ref: scalar
1299
    # expansion shortcut), with schema/override hints
1300
    schema = {"colx": pl.String, "coly": pl.Int32}
1301

1302
    for param in ("schema", "schema_overrides"):
1303
        df = pl.DataFrame({"colx": [], "coly": 0}, **{param: schema})  # type: ignore[arg-type]
1304
        assert df.schema == schema
1305

1306

1307
def test_nested_read_dicts_4143() -> None:
1308
    result = pl.from_dicts(
1309
        [
1310
            {
1311
                "id": 1,
1312
                "hint": [
1313
                    {"some_text_here": "text", "list_": [1, 2, 4]},
1314
                    {"some_text_here": "text", "list_": [1, 2, 4]},
1315
                ],
1316
            },
1317
            {
1318
                "id": 2,
1319
                "hint": [
1320
                    {"some_text_here": None, "list_": [1]},
1321
                    {"some_text_here": None, "list_": [2]},
1322
                ],
1323
            },
1324
        ]
1325
    )
1326
    expected = {
1327
        "hint": [
1328
            [
1329
                {"some_text_here": "text", "list_": [1, 2, 4]},
1330
                {"some_text_here": "text", "list_": [1, 2, 4]},
1331
            ],
1332
            [
1333
                {"some_text_here": None, "list_": [1]},
1334
                {"some_text_here": None, "list_": [2]},
1335
            ],
1336
        ],
1337
        "id": [1, 2],
1338
    }
1339
    assert result.to_dict(as_series=False) == expected
1340

1341

1342
def test_nested_read_dicts_4143_2() -> None:
1343
    result = pl.from_dicts(
1344
        [
1345
            {
1346
                "id": 1,
1347
                "hint": [
1348
                    {"some_text_here": "text", "list_": [1, 2, 4]},
1349
                    {"some_text_here": "text", "list_": [1, 2, 4]},
1350
                ],
1351
            },
1352
            {
1353
                "id": 2,
1354
                "hint": [
1355
                    {"some_text_here": "text", "list_": []},
1356
                    {"some_text_here": "text", "list_": []},
1357
                ],
1358
            },
1359
        ]
1360
    )
1361

1362
    assert result.dtypes == [
1363
        pl.Int64,
1364
        pl.List(pl.Struct({"some_text_here": pl.String, "list_": pl.List(pl.Int64)})),
1365
    ]
1366
    expected = {
1367
        "id": [1, 2],
1368
        "hint": [
1369
            [
1370
                {"some_text_here": "text", "list_": [1, 2, 4]},
1371
                {"some_text_here": "text", "list_": [1, 2, 4]},
1372
            ],
1373
            [
1374
                {"some_text_here": "text", "list_": []},
1375
                {"some_text_here": "text", "list_": []},
1376
            ],
1377
        ],
1378
    }
1379
    assert result.to_dict(as_series=False) == expected
1380

1381

1382
def test_from_records_nullable_structs() -> None:
1383
    records = [
1384
        {"id": 1, "items": [{"item_id": 100, "description": None}]},
1385
        {"id": 1, "items": [{"item_id": 100, "description": "hi"}]},
1386
    ]
1387

1388
    schema: list[tuple[str, PolarsDataType]] = [
1389
        ("id", pl.UInt16),
1390
        (
1391
            "items",
1392
            pl.List(
1393
                pl.Struct(
1394
                    [pl.Field("item_id", pl.UInt32), pl.Field("description", pl.String)]
1395
                )
1396
            ),
1397
        ),
1398
    ]
1399

1400
    schema_options: list[list[tuple[str, PolarsDataType]] | None] = [schema, None]
1401
    for s in schema_options:
1402
        result = pl.DataFrame(records, schema=s, orient="row")
1403
        expected = {
1404
            "id": [1, 1],
1405
            "items": [
1406
                [{"item_id": 100, "description": None}],
1407
                [{"item_id": 100, "description": "hi"}],
1408
            ],
1409
        }
1410
        assert result.to_dict(as_series=False) == expected
1411

1412
    # check initialisation without any records
1413
    df = pl.DataFrame(schema=schema)
1414
    dict_schema = dict(schema)
1415
    assert df.to_dict(as_series=False) == {"id": [], "items": []}
1416
    assert df.schema == dict_schema
1417

1418
    dtype: PolarsDataType = dict_schema["items"]
1419
    series = pl.Series("items", dtype=dtype)
1420
    assert series.to_frame().to_dict(as_series=False) == {"items": []}
1421
    assert series.dtype == dict_schema["items"]
1422
    assert series.to_list() == []
1423

1424

1425
@pytest.mark.parametrize("unnest_column", ["a", pl.col("a"), cs.by_name("a")])
1426
def test_from_categorical_in_struct_defined_by_schema(unnest_column: Any) -> None:
1427
    df = pl.DataFrame(
1428
        {"a": [{"value": "foo", "counts": 1}, {"value": "bar", "counts": 2}]},
1429
        schema={"a": pl.Struct({"value": pl.Categorical, "counts": pl.UInt32})},
1430
    )
1431

1432
    expected = pl.DataFrame(
1433
        {"value": ["foo", "bar"], "counts": [1, 2]},
1434
        schema={"value": pl.Categorical, "counts": pl.UInt32},
1435
    )
1436

1437
    res_eager = df.unnest(unnest_column)
1438
    assert_frame_equal(res_eager, expected, categorical_as_str=True)
1439

1440
    res_lazy = df.lazy().unnest(unnest_column)
1441
    assert_frame_equal(res_lazy.collect(), expected, categorical_as_str=True)
1442

1443

1444
def test_nested_schema_construction() -> None:
1445
    schema = {
1446
        "node_groups": pl.List(
1447
            pl.Struct(
1448
                [
1449
                    pl.Field("parent_node_group_id", pl.UInt8),
1450
                    pl.Field(
1451
                        "nodes",
1452
                        pl.List(
1453
                            pl.Struct(
1454
                                [
1455
                                    pl.Field("name", pl.String),
1456
                                    pl.Field(
1457
                                        "sub_nodes",
1458
                                        pl.List(
1459
                                            pl.Struct(
1460
                                                [
1461
                                                    pl.Field("internal_id", pl.UInt64),
1462
                                                    pl.Field("value", pl.UInt32),
1463
                                                ]
1464
                                            )
1465
                                        ),
1466
                                    ),
1467
                                ]
1468
                            )
1469
                        ),
1470
                    ),
1471
                ]
1472
            )
1473
        )
1474
    }
1475
    df = pl.DataFrame(
1476
        {
1477
            "node_groups": [
1478
                [{"nodes": []}, {"nodes": [{"name": "", "sub_nodes": []}]}],
1479
            ]
1480
        },
1481
        schema=schema,
1482
    )
1483

1484
    assert df.schema == schema
1485
    assert df.to_dict(as_series=False) == {
1486
        "node_groups": [
1487
            [
1488
                {"parent_node_group_id": None, "nodes": []},
1489
                {
1490
                    "parent_node_group_id": None,
1491
                    "nodes": [{"name": "", "sub_nodes": []}],
1492
                },
1493
            ]
1494
        ]
1495
    }
1496

1497

1498
def test_nested_schema_construction2() -> None:
1499
    schema = {
1500
        "node_groups": pl.List(
1501
            pl.Struct(
1502
                [
1503
                    pl.Field(
1504
                        "nodes",
1505
                        pl.List(
1506
                            pl.Struct(
1507
                                [
1508
                                    pl.Field("name", pl.String),
1509
                                    pl.Field("time", pl.UInt32),
1510
                                ]
1511
                            )
1512
                        ),
1513
                    )
1514
                ]
1515
            )
1516
        )
1517
    }
1518
    df = pl.DataFrame(
1519
        [
1520
            {"node_groups": [{"nodes": [{"name": "a", "time": 0}]}]},
1521
            {"node_groups": [{"nodes": []}]},
1522
        ],
1523
        schema=schema,
1524
    )
1525
    assert df.schema == schema
1526
    assert df.to_dict(as_series=False) == {
1527
        "node_groups": [[{"nodes": [{"name": "a", "time": 0}]}], [{"nodes": []}]]
1528
    }
1529

1530

1531
def test_arrow_to_pyseries_with_one_chunk_does_not_copy_data() -> None:
1532
    from polars._utils.construction import arrow_to_pyseries
1533

1534
    original_array = pa.chunked_array([[1, 2, 3]], type=pa.int64())
1535
    pyseries = arrow_to_pyseries("", original_array)
1536
    assert (
1537
        pyseries.get_chunks()[0]._get_buffer_info()[0]
1538
        == original_array.chunks[0].buffers()[1].address
1539
    )
1540

1541

1542
def test_init_with_explicit_binary_schema() -> None:
1543
    df = pl.DataFrame({"a": [b"hello", b"world"]}, schema={"a": pl.Binary})
1544
    assert df.schema == {"a": pl.Binary}
1545
    assert df["a"].to_list() == [b"hello", b"world"]
1546

1547
    s = pl.Series("a", [b"hello", b"world"], dtype=pl.Binary)
1548
    assert s.dtype == pl.Binary
1549
    assert s.to_list() == [b"hello", b"world"]
1550

1551

1552
def test_nested_categorical() -> None:
1553
    s = pl.Series([["a"]], dtype=pl.List(pl.Categorical))
1554
    assert s.to_list() == [["a"]]
1555
    assert s.dtype == pl.List(pl.Categorical)
1556

1557

1558
def test_datetime_date_subclasses() -> None:
1559
    class FakeDate(date): ...
1560

1561
    class FakeDateChild(FakeDate): ...
1562

1563
    class FakeDatetime(FakeDate, datetime): ...
1564

1565
    result = pl.Series([FakeDate(2020, 1, 1)])
1566
    expected = pl.Series([date(2020, 1, 1)])
1567
    assert_series_equal(result, expected)
1568

1569
    result = pl.Series([FakeDateChild(2020, 1, 1)])
1570
    expected = pl.Series([date(2020, 1, 1)])
1571
    assert_series_equal(result, expected)
1572

1573
    result = pl.Series([FakeDatetime(2020, 1, 1, 3)])
1574
    expected = pl.Series([datetime(2020, 1, 1, 3)])
1575
    assert_series_equal(result, expected)
1576

1577

1578
def test_list_null_constructor() -> None:
1579
    s = pl.Series("a", [[None], [None]], dtype=pl.List(pl.Null))
1580
    assert s.dtype == pl.List(pl.Null)
1581
    assert s.to_list() == [[None], [None]]
1582

1583
    # nested
1584
    dtype = pl.List(pl.List(pl.Int8))
1585
    values = [
1586
        [],
1587
        [[], []],
1588
        [[33, 112]],
1589
    ]
1590
    s = pl.Series(
1591
        name="colx",
1592
        values=values,
1593
        dtype=dtype,
1594
    )
1595
    assert s.dtype == dtype
1596
    assert s.to_list() == values
1597

1598
    # nested
1599
    # small order change has influence
1600
    dtype = pl.List(pl.List(pl.Int8))
1601
    values = [
1602
        [[], []],
1603
        [],
1604
        [[33, 112]],
1605
    ]
1606
    s = pl.Series(
1607
        name="colx",
1608
        values=values,
1609
        dtype=dtype,
1610
    )
1611
    assert s.dtype == dtype
1612
    assert s.to_list() == values
1613

1614

1615
def test_numpy_float_construction_av() -> None:
1616
    np_dict = {"a": np.float64(1)}
1617
    assert_frame_equal(pl.DataFrame(np_dict), pl.DataFrame({"a": 1.0}))
1618

1619

1620
def test_df_init_dict_raise_on_expression_input() -> None:
1621
    with pytest.raises(
1622
        TypeError,
1623
        match="passing Expr objects to the DataFrame constructor is not supported",
1624
    ):
1625
        pl.DataFrame({"a": pl.int_range(0, 3)})
1626
    with pytest.raises(TypeError):
1627
        pl.DataFrame({"a": pl.int_range(0, 3), "b": [3, 4, 5]})
1628

1629
    # Passing a list of expressions is allowed
1630
    df = pl.DataFrame({"a": [pl.int_range(0, 3)]})
1631
    assert df.get_column("a").dtype.is_object()
1632

1633

1634
def test_df_schema_sequences() -> None:
1635
    schema = [
1636
        ["address", pl.String],
1637
        ["key", pl.Int64],
1638
        ["value", pl.Float32],
1639
    ]
1640
    df = pl.DataFrame(schema=schema)  # type: ignore[arg-type]
1641
    assert df.schema == {"address": pl.String, "key": pl.Int64, "value": pl.Float32}
1642

1643

1644
def test_df_schema_sequences_incorrect_length() -> None:
1645
    schema = [
1646
        ["address", pl.String, pl.Int8],
1647
        ["key", pl.Int64],
1648
        ["value", pl.Float32],
1649
    ]
1650
    with pytest.raises(ValueError):
1651
        pl.DataFrame(schema=schema)  # type: ignore[arg-type]
1652

1653

1654
@pytest.mark.parametrize(
1655
    ("input", "infer_func", "expected_dtype"),
1656
    [
1657
        ("f8", numpy_char_code_to_dtype, pl.Float64),
1658
        ("f4", numpy_char_code_to_dtype, pl.Float32),
1659
        ("i4", numpy_char_code_to_dtype, pl.Int32),
1660
        ("u1", numpy_char_code_to_dtype, pl.UInt8),
1661
        ("?", numpy_char_code_to_dtype, pl.Boolean),
1662
        ("m8", numpy_char_code_to_dtype, pl.Duration("us")),
1663
        ("M8", numpy_char_code_to_dtype, pl.Datetime("us")),
1664
    ],
1665
)
1666
def test_numpy_inference(
1667
    input: Any,
1668
    infer_func: Callable[[Any], PolarsDataType],
1669
    expected_dtype: PolarsDataType,
1670
) -> None:
1671
    result = infer_func(input)
1672
    assert result == expected_dtype
1673

1674

1675
def test_array_construction() -> None:
1676
    payload = [[1, 2, 3], None, [4, 2, 3]]
1677

1678
    dtype = pl.Array(pl.Int64, 3)
1679
    s = pl.Series(payload, dtype=dtype)
1680
    assert s.dtype == dtype
1681
    assert s.to_list() == payload
1682

1683
    # inner type
1684
    dtype = pl.Array(pl.UInt8, 2)
1685
    payload = [[1, 2], None, [3, 4]]
1686
    s = pl.Series(payload, dtype=dtype)
1687
    assert s.dtype == dtype
1688
    assert s.to_list() == payload
1689

1690
    # create using schema
1691
    df = pl.DataFrame(
1692
        schema={
1693
            "a": pl.Array(pl.Float32, 3),
1694
            "b": pl.Array(pl.Datetime("ms"), 5),
1695
        }
1696
    )
1697
    assert df.dtypes == [
1698
        pl.Array(pl.Float32, 3),
1699
        pl.Array(pl.Datetime("ms"), 5),
1700
    ]
1701
    assert df.rows() == []
1702

1703
    # from dicts
1704
    rows = [
1705
        {"row_id": "a", "data": [1, 2, 3]},
1706
        {"row_id": "b", "data": [2, 3, 4]},
1707
    ]
1708
    schema = {"row_id": pl.String(), "data": pl.Array(inner=pl.Int64, shape=3)}
1709
    df = pl.from_dicts(rows, schema=schema)
1710
    assert df.schema == schema
1711
    assert df.rows() == [("a", [1, 2, 3]), ("b", [2, 3, 4])]
1712

1713

1714
@pytest.mark.may_fail_auto_streaming
1715
def test_pycapsule_interface(df: pl.DataFrame) -> None:
1716
    df = df.rechunk()
1717
    pyarrow_table = df.to_arrow()
1718

1719
    # Array via C data interface
1720
    pyarrow_array = pyarrow_table["bools"].chunk(0)
1721
    round_trip_series = pl.Series(PyCapsuleArrayHolder(pyarrow_array))
1722
    assert df["bools"].equals(round_trip_series, check_dtypes=True, check_names=False)
1723

1724
    # empty Array via C data interface
1725
    empty_pyarrow_array = pa.array([], type=pyarrow_array.type)
1726
    round_trip_series = pl.Series(PyCapsuleArrayHolder(empty_pyarrow_array))
1727
    assert df["bools"].dtype == round_trip_series.dtype
1728

1729
    # RecordBatch via C array interface
1730
    pyarrow_record_batch = pyarrow_table.to_batches()[0]
1731
    round_trip_df = pl.DataFrame(PyCapsuleArrayHolder(pyarrow_record_batch))
1732
    assert df.equals(round_trip_df)
1733

1734
    # ChunkedArray via C stream interface
1735
    pyarrow_chunked_array = pyarrow_table["bools"]
1736
    round_trip_series = pl.Series(PyCapsuleStreamHolder(pyarrow_chunked_array))
1737
    assert df["bools"].equals(round_trip_series, check_dtypes=True, check_names=False)
1738

1739
    # empty ChunkedArray via C stream interface
1740
    empty_chunked_array = pa.chunked_array([], type=pyarrow_chunked_array.type)
1741
    round_trip_series = pl.Series(PyCapsuleStreamHolder(empty_chunked_array))
1742
    assert df["bools"].dtype == round_trip_series.dtype
1743

1744
    # Table via C stream interface
1745
    round_trip_df = pl.DataFrame(PyCapsuleStreamHolder(pyarrow_table))
1746
    assert df.equals(round_trip_df)
1747

1748
    # empty Table via C stream interface
1749
    empty_df = df[:0].to_arrow()
1750
    round_trip_df = pl.DataFrame(PyCapsuleStreamHolder(empty_df))
1751
    orig_schema = df.schema
1752
    round_trip_schema = round_trip_df.schema
1753

1754
    # The "enum" schema is not preserved because categories are lost via C data
1755
    # interface
1756
    orig_schema.pop("enum")
1757
    round_trip_schema.pop("enum")
1758

1759
    assert orig_schema == round_trip_schema
1760

1761
    # RecordBatchReader via C stream interface
1762
    pyarrow_reader = pa.RecordBatchReader.from_batches(
1763
        pyarrow_table.schema, pyarrow_table.to_batches()
1764
    )
1765
    round_trip_df = pl.DataFrame(PyCapsuleStreamHolder(pyarrow_reader))
1766
    assert df.equals(round_trip_df)
1767

1768

1769
@pytest.mark.parametrize(
1770
    "tz",
1771
    [
1772
        None,
1773
        ZoneInfo("Asia/Tokyo"),
1774
        ZoneInfo("Europe/Amsterdam"),
1775
        ZoneInfo("UTC"),
1776
        timezone.utc,
1777
    ],
1778
)
1779
def test_init_list_of_dicts_with_timezone(tz: Any) -> None:
1780
    dt = datetime(2023, 1, 1, 0, 0, 0, 0, tzinfo=tz)
1781

1782
    df = pl.DataFrame([{"dt": dt}, {"dt": dt}])
1783
    expected = pl.DataFrame({"dt": [dt, dt]})
1784
    assert_frame_equal(df, expected)
1785

1786
    assert df.schema == {"dt": pl.Datetime("us", time_zone=tz)}
1787

1788

1789
@pytest.mark.parametrize(
1790
    "tz",
1791
    [
1792
        None,
1793
        ZoneInfo("Asia/Tokyo"),
1794
        ZoneInfo("Europe/Amsterdam"),
1795
        ZoneInfo("UTC"),
1796
        timezone.utc,
1797
    ],
1798
)
1799
def test_init_list_of_nested_dicts_with_timezone(tz: Any) -> None:
1800
    dt = datetime(2021, 1, 1, 0, 0, 0, 0, tzinfo=tz)
1801
    data = [{"timestamp": {"content": datetime(2021, 1, 1, 0, 0, tzinfo=tz)}}]
1802

1803
    df = pl.DataFrame(data).unnest("timestamp")
1804
    expected = pl.DataFrame({"content": [dt]})
1805
    assert_frame_equal(df, expected)
1806

1807
    assert df.schema == {"content": pl.Datetime("us", time_zone=tz)}
1808

1809

1810
def test_init_from_subclassed_types() -> None:
1811
    # more detailed test of one custom subclass...
1812
    import codecs
1813

1814
    class SuperSecretString(str):
1815
        def __new__(cls, value: str) -> Self:
1816
            return super().__new__(cls, value)
1817

1818
        def __repr__(self) -> str:
1819
            return codecs.encode(self, "rot_13")
1820

1821
    w = "windmolen"
1822
    sstr = SuperSecretString(w)
1823

1824
    assert sstr == w
1825
    assert isinstance(sstr, str)
1826
    assert repr(sstr) == "jvaqzbyra"
1827
    assert_series_equal(pl.Series([w, w]), pl.Series([sstr, sstr]))
1828

1829
    # ...then validate across other basic types
1830
    for BaseType, value in (
1831
        (int, 42),
1832
        (float, 5.5),
1833
        (bytes, b"value"),
1834
        (str, "value"),
1835
    ):
1836

1837
        class SubclassedType(BaseType):  # type: ignore[misc,valid-type]
1838
            def __new__(cls, value: Any) -> Self:
1839
                return super().__new__(cls, value)  # type: ignore[no-any-return]
1840

1841
        assert (
1842
            pl.Series([value]).to_list() == pl.Series([SubclassedType(value)]).to_list()
1843
        )
1844

1845

1846
def test_series_init_with_python_type_7737() -> None:
1847
    assert pl.Series([], dtype=int).dtype == pl.Int64  # type: ignore[arg-type]
1848
    assert pl.Series([], dtype=float).dtype == pl.Float64  # type: ignore[arg-type]
1849
    assert pl.Series([], dtype=bool).dtype == pl.Boolean  # type: ignore[arg-type]
1850
    assert pl.Series([], dtype=str).dtype == pl.Utf8  # type: ignore[arg-type]
1851

1852
    with pytest.raises(TypeError):
1853
        pl.Series(["a"], dtype=int)  # type: ignore[arg-type]
1854

1855
    with pytest.raises(TypeError):
1856
        pl.Series([True], dtype=str)  # type: ignore[arg-type]
1857

1858

1859
def test_init_from_list_shape_6968() -> None:
1860
    df1 = pl.DataFrame([[1, None], [2, None], [3, None]])
1861
    df2 = pl.DataFrame([[None, None], [2, None], [3, None]])
1862
    assert df1.shape == (2, 3)
1863
    assert df2.shape == (2, 3)
1864

1865
Product

Resources

Company