CoCalc -- test_series.py

GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/series/test_series.py
⁶⁹³⁹ views
1
from __future__ import annotations
2

3
import math
4
from datetime import date, datetime, time, timedelta
5
from typing import TYPE_CHECKING, Any, cast
6
from zoneinfo import ZoneInfo
7

8
import numpy as np
9
import pandas as pd
10
import pyarrow as pa
11
import pytest
12

13
import polars as pl
14
from polars._utils.construction import iterable_to_pyseries
15
from polars.datatypes import (
16
    Datetime,
17
    Field,
18
    Float64,
19
    Int32,
20
    Int64,
21
    Time,
22
    UInt32,
23
    UInt64,
24
    Unknown,
25
)
26
from polars.exceptions import (
27
    DuplicateError,
28
    InvalidOperationError,
29
    PolarsInefficientMapWarning,
30
    ShapeError,
31
)
32
from polars.testing import assert_frame_equal, assert_series_equal
33
from tests.unit.conftest import FLOAT_DTYPES, INTEGER_DTYPES
34
from tests.unit.utils.pycapsule_utils import PyCapsuleStreamHolder
35

36
if TYPE_CHECKING:
37
    from collections.abc import Iterator
38

39
    from polars._typing import EpochTimeUnit, PolarsDataType, TimeUnit
40

41

42
def test_cum_agg() -> None:
43
    # confirm that known series give expected results
44
    s = pl.Series("a", [1, 2, 3, 2])
45
    assert_series_equal(s.cum_sum(), pl.Series("a", [1, 3, 6, 8]))
46
    assert_series_equal(s.cum_min(), pl.Series("a", [1, 1, 1, 1]))
47
    assert_series_equal(s.cum_max(), pl.Series("a", [1, 2, 3, 3]))
48
    assert_series_equal(s.cum_prod(), pl.Series("a", [1, 2, 6, 12]))
49

50

51
def test_cum_agg_with_nulls() -> None:
52
    # confirm that known series give expected results
53
    s = pl.Series("a", [None, 2, None, 7, 8, None])
54
    assert_series_equal(s.cum_sum(), pl.Series("a", [None, 2, None, 9, 17, None]))
55
    assert_series_equal(s.cum_min(), pl.Series("a", [None, 2, None, 2, 2, None]))
56
    assert_series_equal(s.cum_max(), pl.Series("a", [None, 2, None, 7, 8, None]))
57
    assert_series_equal(s.cum_prod(), pl.Series("a", [None, 2, None, 14, 112, None]))
58

59

60
def test_cum_agg_with_infs() -> None:
61
    # confirm that inf values are handled correctly
62
    s = pl.Series([float("inf"), 0.0, 1.0])
63
    assert_series_equal(s.cum_min(), pl.Series([float("inf"), 0.0, 0.0]))
64

65
    s = pl.Series([float("-inf"), 0.0, 1.0])
66
    assert_series_equal(s.cum_max(), pl.Series([float("-inf"), 0.0, 1.0]))
67

68

69
def test_cum_min_max_bool() -> None:
70
    s = pl.Series("a", [None, True, True, None, False, None, True, False, False, None])
71
    assert_series_equal(s.cum_min().cast(pl.Int32), s.cast(pl.Int32).cum_min())
72
    assert_series_equal(s.cum_max().cast(pl.Int32), s.cast(pl.Int32).cum_max())
73
    assert_series_equal(
74
        s.cum_min(reverse=True).cast(pl.Int32), s.cast(pl.Int32).cum_min(reverse=True)
75
    )
76
    assert_series_equal(
77
        s.cum_max(reverse=True).cast(pl.Int32), s.cast(pl.Int32).cum_max(reverse=True)
78
    )
79

80

81
def test_init_inputs(monkeypatch: Any) -> None:
82
    nan = float("nan")
83
    # Good inputs
84
    pl.Series("a", [1, 2])
85
    pl.Series("a", values=[1, 2])
86
    pl.Series(name="a", values=[1, 2])
87
    pl.Series(values=[1, 2], name="a")
88

89
    assert pl.Series([1, 2]).dtype == pl.Int64
90
    assert pl.Series(values=[1, 2]).dtype == pl.Int64
91
    assert pl.Series("a").dtype == pl.Null  # Null dtype used in case of no data
92
    assert pl.Series().dtype == pl.Null
93
    assert pl.Series([]).dtype == pl.Null
94
    assert (
95
        pl.Series([None, None, None]).dtype == pl.Null
96
    )  # f32 type used for list with only None
97
    assert pl.Series(values=[True, False]).dtype == pl.Boolean
98
    assert pl.Series(values=np.array([True, False])).dtype == pl.Boolean
99
    assert pl.Series(values=np.array(["foo", "bar"])).dtype == pl.String
100
    assert pl.Series(values=["foo", "bar"]).dtype == pl.String
101
    assert pl.Series("a", [pl.Series([1, 2, 4]), pl.Series([3, 2, 1])]).dtype == pl.List
102
    assert pl.Series("a", [10000, 20000, 30000], dtype=pl.Time).dtype == pl.Time
103

104
    # 2d numpy array and/or list of 1d numpy arrays
105
    for res in (
106
        pl.Series(
107
            name="a",
108
            values=np.array([[1, 2], [3, nan]], dtype=np.float32),
109
            nan_to_null=True,
110
        ),
111
        pl.Series(
112
            name="a",
113
            values=[
114
                np.array([1, 2], dtype=np.float32),
115
                np.array([3, nan], dtype=np.float32),
116
            ],
117
            nan_to_null=True,
118
        ),
119
        pl.Series(
120
            name="a",
121
            values=(
122
                np.ndarray((2,), np.float32, np.array([1, 2], dtype=np.float32)),
123
                np.ndarray((2,), np.float32, np.array([3, nan], dtype=np.float32)),
124
            ),
125
            nan_to_null=True,
126
        ),
127
    ):
128
        assert res.dtype == pl.Array(pl.Float32, shape=2)
129
        assert res[0].to_list() == [1.0, 2.0]
130
        assert res[1].to_list() == [3.0, None]
131

132
    # numpy from arange, with/without dtype
133
    two_ints = np.arange(2, dtype=np.int64)
134
    three_ints = np.arange(3, dtype=np.int64)
135
    for res in (
136
        pl.Series("a", [two_ints, three_ints]),
137
        pl.Series("a", [two_ints, three_ints], dtype=pl.List(pl.Int64)),
138
    ):
139
        assert res.dtype == pl.List(pl.Int64)
140
        assert res.to_list() == [[0, 1], [0, 1, 2]]
141

142
    assert pl.Series(
143
        values=np.array([["foo", "bar"], ["foo2", "bar2"]])
144
    ).dtype == pl.Array(pl.String, shape=2)
145

146
    # lists
147
    assert pl.Series("a", [[1, 2], [3, 4]]).dtype == pl.List(pl.Int64)
148

149
    # conversion of Date to Datetime
150
    s = pl.Series([date(2023, 1, 1), date(2023, 1, 2)], dtype=pl.Datetime)
151
    assert s.to_list() == [datetime(2023, 1, 1), datetime(2023, 1, 2)]
152
    assert Datetime == s.dtype
153
    assert s.dtype.time_unit == "us"  # type: ignore[attr-defined]
154
    assert s.dtype.time_zone is None  # type: ignore[attr-defined]
155

156
    # conversion of Date to Datetime with specified timezone and units
157
    tu: TimeUnit = "ms"
158
    tz = "America/Argentina/Rio_Gallegos"
159
    s = pl.Series(
160
        [date(2023, 1, 1), date(2023, 1, 2)], dtype=pl.Datetime(tu)
161
    ).dt.replace_time_zone(tz)
162
    d1 = datetime(2023, 1, 1, 0, 0, 0, 0, ZoneInfo(tz))
163
    d2 = datetime(2023, 1, 2, 0, 0, 0, 0, ZoneInfo(tz))
164
    assert s.to_list() == [d1, d2]
165
    assert Datetime == s.dtype
166
    assert s.dtype.time_unit == tu  # type: ignore[attr-defined]
167
    assert s.dtype.time_zone == tz  # type: ignore[attr-defined]
168

169
    # datetime64: check timeunit (auto-detect, implicit/explicit) and NaT
170
    d64 = pd.date_range(date(2021, 8, 1), date(2021, 8, 3)).values
171
    d64[1] = None
172

173
    expected = [datetime(2021, 8, 1, 0), None, datetime(2021, 8, 3, 0)]
174
    for dtype in (None, Datetime, Datetime("ns")):
175
        s = pl.Series("dates", d64, dtype)
176
        assert s.to_list() == expected
177
        assert Datetime == s.dtype
178
        assert s.dtype.time_unit == "ns"  # type: ignore[attr-defined]
179

180
    s = pl.Series(values=d64.astype("<M8[ms]"))
181
    assert s.dtype.time_unit == "ms"  # type: ignore[attr-defined]
182
    assert expected == s.to_list()
183

184
    # pandas
185
    assert pl.Series(pd.Series([1, 2])).dtype == pl.Int64
186

187
    # Bad inputs
188
    with pytest.raises(TypeError):
189
        pl.Series([1, 2, 3], [1, 2, 3])
190
    with pytest.raises(TypeError):
191
        pl.Series({"a": [1, 2, 3]})
192
    with pytest.raises(OverflowError):
193
        pl.Series("bigint", [2**128])
194

195
    # numpy not available
196
    monkeypatch.setattr(pl.series.series, "_check_for_numpy", lambda x: False)
197
    with pytest.raises(TypeError):
198
        pl.DataFrame(np.array([1, 2, 3]), schema=["a"])
199

200

201
def test_init_structured_objects() -> None:
202
    # validate init from dataclass, namedtuple, and pydantic model objects
203
    from typing import NamedTuple
204

205
    from polars.dependencies import dataclasses, pydantic
206

207
    @dataclasses.dataclass
208
    class TeaShipmentDC:
209
        exporter: str
210
        importer: str
211
        product: str
212
        tonnes: int | None
213

214
    class TeaShipmentNT(NamedTuple):
215
        exporter: str
216
        importer: str
217
        product: str
218
        tonnes: None | int
219

220
    class TeaShipmentPD(pydantic.BaseModel):
221
        exporter: str
222
        importer: str
223
        product: str
224
        tonnes: int
225

226
    for Tea in (TeaShipmentDC, TeaShipmentNT, TeaShipmentPD):
227
        t0 = Tea(exporter="Sri Lanka", importer="USA", product="Ceylon", tonnes=10)
228
        t1 = Tea(exporter="India", importer="UK", product="Darjeeling", tonnes=25)
229
        t2 = Tea(exporter="China", importer="UK", product="Keemum", tonnes=40)
230

231
        s = pl.Series("t", [t0, t1, t2])
232

233
        assert isinstance(s, pl.Series)
234
        assert s.dtype.fields == [  # type: ignore[attr-defined]
235
            Field("exporter", pl.String),
236
            Field("importer", pl.String),
237
            Field("product", pl.String),
238
            Field("tonnes", pl.Int64),
239
        ]
240
        assert s.to_list() == [
241
            {
242
                "exporter": "Sri Lanka",
243
                "importer": "USA",
244
                "product": "Ceylon",
245
                "tonnes": 10,
246
            },
247
            {
248
                "exporter": "India",
249
                "importer": "UK",
250
                "product": "Darjeeling",
251
                "tonnes": 25,
252
            },
253
            {
254
                "exporter": "China",
255
                "importer": "UK",
256
                "product": "Keemum",
257
                "tonnes": 40,
258
            },
259
        ]
260
        assert_frame_equal(s.to_frame(), pl.DataFrame({"t": [t0, t1, t2]}))
261

262

263
def test_to_frame() -> None:
264
    s1 = pl.Series([1, 2])
265
    s2 = pl.Series("s", [1, 2])
266

267
    df1 = s1.to_frame()
268
    df2 = s2.to_frame()
269
    df3 = s1.to_frame("xyz")
270
    df4 = s2.to_frame("xyz")
271

272
    for df, name in ((df1, ""), (df2, "s"), (df3, "xyz"), (df4, "xyz")):
273
        assert isinstance(df, pl.DataFrame)
274
        assert df.rows() == [(1,), (2,)]
275
        assert df.columns == [name]
276

277
    # note: the empty string IS technically a valid column name
278
    assert s2.to_frame("").columns == [""]
279
    assert s2.name == "s"
280

281

282
def test_bitwise_ops() -> None:
283
    a = pl.Series([True, False, True])
284
    b = pl.Series([False, True, True])
285
    assert_series_equal((a & b), pl.Series([False, False, True]))
286
    assert_series_equal((a | b), pl.Series([True, True, True]))
287
    assert_series_equal((a ^ b), pl.Series([True, True, False]))
288
    assert_series_equal((~a), pl.Series([False, True, False]))
289

290
    # rand/rxor/ror we trigger by casting the left hand to a list here in the test
291
    # Note that the type annotations only allow Series to be passed in, but there is
292
    # specific code to deal with non-Series inputs.
293
    assert_series_equal(
294
        (True & a),
295
        pl.Series([True, False, True]),
296
    )
297
    assert_series_equal(
298
        (True | a),
299
        pl.Series([True, True, True]),
300
    )
301
    assert_series_equal(
302
        (True ^ a),
303
        pl.Series([False, True, False]),
304
    )
305

306

307
def test_bitwise_floats_invert() -> None:
308
    s = pl.Series([2.0, 3.0, 0.0])
309

310
    with pytest.raises(InvalidOperationError):
311
        ~s
312

313

314
def test_equality() -> None:
315
    a = pl.Series("a", [1, 2])
316
    b = a
317

318
    cmp = a == b
319
    assert isinstance(cmp, pl.Series)
320
    assert cmp.sum() == 2
321
    assert (a != b).sum() == 0
322
    assert (a >= b).sum() == 2
323
    assert (a <= b).sum() == 2
324
    assert (a > b).sum() == 0
325
    assert (a < b).sum() == 0
326
    assert a.sum() == 3
327
    assert_series_equal(a, b)
328

329
    a = pl.Series("name", ["ham", "foo", "bar"])
330
    assert_series_equal((a == "ham"), pl.Series("name", [True, False, False]))
331

332
    a = pl.Series("name", [[1], [1, 2], [2, 3]])
333
    assert_series_equal((a == [1]), pl.Series("name", [True, False, False]))
334

335

336
def test_agg() -> None:
337
    series = pl.Series("a", [1, 2])
338
    assert series.mean() == 1.5
339
    assert series.min() == 1
340
    assert series.max() == 2
341

342

343
def test_date_agg() -> None:
344
    series = pl.Series(
345
        [
346
            date(2022, 8, 2),
347
            date(2096, 8, 1),
348
            date(9009, 9, 9),
349
        ],
350
        dtype=pl.Date,
351
    )
352
    assert series.min() == date(2022, 8, 2)
353
    assert series.max() == date(9009, 9, 9)
354

355

356
@pytest.mark.parametrize(
357
    ("s", "min", "max"),
358
    [
359
        (pl.Series(["c", "b", "a"], dtype=pl.Categorical("lexical")), "a", "c"),
360
        (pl.Series([None, "a", "c", "b"], dtype=pl.Categorical("lexical")), "a", "c"),
361
        (pl.Series([], dtype=pl.Categorical("lexical")), None, None),
362
        (pl.Series(["c", "b", "a"], dtype=pl.Enum(["c", "b", "a"])), "c", "a"),
363
        (pl.Series(["c", "b", "a"], dtype=pl.Enum(["c", "b", "a", "d"])), "c", "a"),
364
    ],
365
)
366
def test_categorical_agg(s: pl.Series, min: str | None, max: str | None) -> None:
367
    assert s.min() == min
368
    assert s.max() == max
369

370

371
def test_add_string() -> None:
372
    s = pl.Series(["hello", "weird"])
373
    result = s + " world"
374
    print(result)
375
    assert_series_equal(result, pl.Series(["hello world", "weird world"]))
376

377
    result = "pfx:" + s
378
    assert_series_equal(result, pl.Series("literal", ["pfx:hello", "pfx:weird"]))
379

380

381
@pytest.mark.parametrize(
382
    ("data", "expected_dtype"),
383
    [
384
        (100, pl.Int64),
385
        (8.5, pl.Float64),
386
        ("서울특별시", pl.String),
387
        (date.today(), pl.Date),
388
        (datetime.now(), pl.Datetime("us")),
389
        (time(23, 59, 59), pl.Time),
390
        (timedelta(hours=7, seconds=123), pl.Duration("us")),
391
    ],
392
)
393
def test_unknown_dtype(data: Any, expected_dtype: PolarsDataType) -> None:
394
    # if given 'Unknown', should be able to infer the correct dtype
395
    s = pl.Series([data], dtype=Unknown)
396
    assert s.dtype == expected_dtype
397
    assert s.to_list() == [data]
398

399

400
def test_various() -> None:
401
    a = pl.Series("a", [1, 2])
402
    assert a.is_null().sum() == 0
403
    assert a.name == "a"
404

405
    a = a.rename("b")
406
    assert a.name == "b"
407
    assert a.len() == 2
408
    assert len(a) == 2
409

410
    a.append(a.clone())
411
    assert_series_equal(a, pl.Series("b", [1, 2, 1, 2]))
412

413
    a = pl.Series("a", range(20))
414
    assert a.head(5).len() == 5
415
    assert a.tail(5).len() == 5
416
    assert (a.head(5) != a.tail(5)).all()
417

418
    a = pl.Series("a", [2, 1, 4])
419
    a.sort(in_place=True)
420
    assert_series_equal(a, pl.Series("a", [1, 2, 4]))
421
    a = pl.Series("a", [2, 1, 1, 4, 4, 4])
422
    assert_series_equal(a.arg_unique(), pl.Series("a", [0, 1, 3], dtype=UInt32))
423

424
    assert_series_equal(a.gather([2, 3]), pl.Series("a", [1, 4]))
425

426

427
def test_series_dtype_is() -> None:
428
    s = pl.Series("s", [1, 2, 3])
429

430
    assert s.dtype.is_numeric()
431
    assert s.dtype.is_integer()
432
    assert s.dtype.is_signed_integer()
433
    assert not s.dtype.is_unsigned_integer()
434
    assert (s * 0.99).dtype.is_float()
435

436
    s = pl.Series("s", [1, 2, 3], dtype=pl.UInt8)
437
    assert s.dtype.is_numeric()
438
    assert s.dtype.is_integer()
439
    assert not s.dtype.is_signed_integer()
440
    assert s.dtype.is_unsigned_integer()
441

442
    s = pl.Series("bool", [True, None, False])
443
    assert not s.dtype.is_numeric()
444

445
    s = pl.Series("s", ["testing..."])
446
    assert s.dtype == pl.String
447
    assert s.dtype != pl.Boolean
448

449
    s = pl.Series("s", [], dtype=pl.Decimal(20, 15))
450
    assert not s.dtype.is_float()
451
    assert s.dtype.is_numeric()
452
    assert s.is_empty()
453

454
    s = pl.Series("s", [], dtype=pl.Datetime("ms", time_zone="UTC"))
455
    assert s.dtype.is_temporal()
456

457

458
def test_series_head_tail_limit() -> None:
459
    s = pl.Series(range(10))
460

461
    assert_series_equal(s.head(5), pl.Series(range(5)))
462
    assert_series_equal(s.limit(5), s.head(5))
463
    assert_series_equal(s.tail(5), pl.Series(range(5, 10)))
464

465
    # check if it doesn't fail when out of bounds
466
    assert s.head(100).len() == 10
467
    assert s.limit(100).len() == 10
468
    assert s.tail(100).len() == 10
469

470
    # negative values
471
    assert_series_equal(s.head(-7), pl.Series(range(3)))
472
    assert s.head(-2).len() == 8
473
    assert_series_equal(s.tail(-8), pl.Series(range(8, 10)))
474
    assert s.head(-6).len() == 4
475

476
    # negative values out of bounds
477
    assert s.head(-12).len() == 0
478
    assert s.limit(-12).len() == 0
479
    assert s.tail(-12).len() == 0
480

481

482
def test_filter_ops() -> None:
483
    a = pl.Series("a", range(20))
484
    assert a.filter(a > 1).len() == 18
485
    assert a.filter(a < 1).len() == 1
486
    assert a.filter(a <= 1).len() == 2
487
    assert a.filter(a >= 1).len() == 19
488
    assert a.filter(a == 1).len() == 1
489
    assert a.filter(a != 1).len() == 19
490

491

492
def test_cast() -> None:
493
    a = pl.Series("a", range(20))
494

495
    assert a.cast(pl.Float32).dtype == pl.Float32
496
    assert a.cast(pl.Float64).dtype == pl.Float64
497
    assert a.cast(pl.Int32).dtype == pl.Int32
498
    assert a.cast(pl.UInt32).dtype == pl.UInt32
499
    assert a.cast(pl.Datetime).dtype == pl.Datetime
500
    assert a.cast(pl.Date).dtype == pl.Date
501

502
    # display failed values, GH#4706
503
    with pytest.raises(InvalidOperationError, match="foobar"):
504
        pl.Series(["1", "2", "3", "4", "foobar"]).cast(int)
505

506

507
@pytest.mark.parametrize(
508
    "test_data",
509
    [
510
        [1, None, 2],
511
        ["abc", None, "xyz"],
512
        [None, datetime.now()],
513
        [[1, 2], [3, 4], None],
514
    ],
515
)
516
def test_to_pandas(test_data: list[Any]) -> None:
517
    a = pl.Series("s", test_data)
518
    b = a.to_pandas()
519

520
    assert a.name == b.name
521
    assert b.isnull().sum() == 1
522

523
    vals_b: list[Any]
524
    if a.dtype == pl.List:
525
        vals_b = [(None if x is None else x.tolist()) for x in b]
526
    else:
527
        v = b.replace({np.nan: None}).values.tolist()
528
        vals_b = cast("list[Any]", v)
529

530
    assert vals_b == test_data
531

532
    try:
533
        c = a.to_pandas(use_pyarrow_extension_array=True)
534
        assert a.name == c.name
535
        assert c.isnull().sum() == 1
536
        vals_c = [None if x is pd.NA else x for x in c.tolist()]
537
        assert vals_c == test_data
538
    except ModuleNotFoundError:
539
        # Skip test if pandas>=1.5.0 or Pyarrow>=8.0.0 is not installed.
540
        pass
541

542

543
def test_series_to_list() -> None:
544
    s = pl.Series("a", range(20))
545
    result = s.to_list()
546
    assert isinstance(result, list)
547
    assert len(result) == 20
548

549
    a = pl.Series("a", [1, None, 2])
550
    assert a.null_count() == 1
551
    assert a.to_list() == [1, None, 2]
552

553

554
@pytest.mark.may_fail_cloud  # reason: list.to_struct is a eager operation
555
def test_to_struct() -> None:
556
    s = pl.Series("nums", ["12 34", "56 78", "90 00"]).str.extract_all(r"\d+")
557

558
    assert s.list.to_struct().struct.fields == ["field_0", "field_1"]
559
    assert s.list.to_struct(fields=lambda idx: f"n{idx:02}").struct.fields == [
560
        "n00",
561
        "n01",
562
    ]
563
    assert_frame_equal(
564
        s.list.to_struct(fields=["one", "two"]).struct.unnest(),
565
        pl.DataFrame({"one": ["12", "56", "90"], "two": ["34", "78", "00"]}),
566
    )
567

568

569
def test_to_struct_empty() -> None:
570
    df = pl.DataFrame({"y": [[], [], []]}, schema={"y": pl.List(pl.Int64)})
571
    empty_df = df.select(pl.col("y").list.to_struct(fields=[]).struct.unnest())
572
    assert empty_df.shape == (0, 0)
573

574

575
def test_sort() -> None:
576
    a = pl.Series("a", [2, 1, 3])
577
    assert_series_equal(a.sort(), pl.Series("a", [1, 2, 3]))
578
    assert_series_equal(a.sort(descending=True), pl.Series("a", [3, 2, 1]))
579

580

581
def test_rechunk() -> None:
582
    a = pl.Series("a", [1, 2, 3])
583
    b = pl.Series("b", [4, 5, 6])
584
    a.append(b)
585
    assert a.n_chunks() == 2
586
    assert a.rechunk(in_place=False).n_chunks() == 1
587
    a.rechunk(in_place=True)
588
    assert a.n_chunks() == 1
589

590

591
def test_indexing() -> None:
592
    a = pl.Series("a", [1, 2, None])
593
    assert a[1] == 2
594
    assert a[2] is None
595
    b = pl.Series("b", [True, False])
596
    assert b[0]
597
    assert not b[1]
598
    a = pl.Series("a", ["a", None])
599
    assert a[0] == "a"
600
    assert a[1] is None
601
    a = pl.Series("a", [0.1, None])
602
    assert a[0] == 0.1
603
    assert a[1] is None
604

605

606
def test_arrow() -> None:
607
    a = pl.Series("a", [1, 2, 3, None])
608
    out = a.to_arrow()
609
    assert out == pa.array([1, 2, 3, None])
610

611
    b = pl.Series("b", [1.0, 2.0, 3.0, None])
612
    out = b.to_arrow()
613
    assert out == pa.array([1.0, 2.0, 3.0, None])
614

615
    c = pl.Series("c", ["A", "BB", "CCC", None])
616
    out = c.to_arrow()
617
    assert out == pa.array(["A", "BB", "CCC", None], type=pa.large_string())
618
    assert_series_equal(pl.from_arrow(out), c.rename(""))  # type: ignore[arg-type]
619

620
    out = c.to_frame().to_arrow()["c"]
621
    assert isinstance(out, (pa.Array, pa.ChunkedArray))
622
    assert_series_equal(pl.from_arrow(out), c)  # type: ignore[arg-type]
623
    assert_series_equal(pl.from_arrow(out, schema=["x"]), c.rename("x"))  # type: ignore[arg-type]
624

625
    d = pl.Series("d", [None, None, None], pl.Null)
626
    out = d.to_arrow()
627
    assert out == pa.nulls(3)
628

629
    s = cast(
630
        "pl.Series",
631
        pl.from_arrow(pa.array([["foo"], ["foo", "bar"]], pa.list_(pa.utf8()))),
632
    )
633
    assert s.dtype == pl.List
634

635

636
def test_arrow_cat() -> None:
637
    # categorical dtype tests (including various forms of empty pyarrow array)
638
    arr0 = pa.array(["foo", "bar"], pa.dictionary(pa.int32(), pa.utf8()))
639
    assert_series_equal(
640
        pl.Series("arr", ["foo", "bar"], pl.Categorical), pl.Series("arr", arr0)
641
    )
642
    arr1 = pa.array(["xxx", "xxx", None, "yyy"]).dictionary_encode()
643
    arr2 = pa.chunked_array([], arr1.type)
644
    arr3 = pa.array([], arr1.type)
645
    arr4 = pa.array([]).dictionary_encode()
646

647
    assert_series_equal(
648
        pl.Series("arr", ["xxx", "xxx", None, "yyy"], dtype=pl.Categorical),
649
        pl.Series("arr", arr1),
650
    )
651
    for arr in (arr2, arr3):
652
        assert_series_equal(
653
            pl.Series("arr", [], dtype=pl.Categorical), pl.Series("arr", arr)
654
        )
655
    assert_series_equal(pl.Series("arr", [], dtype=pl.Null), pl.Series("arr", arr4))
656

657

658
def test_pycapsule_interface() -> None:
659
    a = pl.Series("a", [1, 2, 3, None])
660
    out = pa.chunked_array(PyCapsuleStreamHolder(a))
661
    out_arr = out.combine_chunks()
662
    assert out_arr == pa.array([1, 2, 3, None])
663

664

665
def test_get() -> None:
666
    a = pl.Series("a", [1, 2, 3])
667
    pos_idxs = pl.Series("idxs", [2, 0, 1, 0], dtype=pl.Int8)
668
    neg_and_pos_idxs = pl.Series(
669
        "neg_and_pos_idxs", [-2, 1, 0, -1, 2, -3], dtype=pl.Int8
670
    )
671
    empty_idxs = pl.Series("idxs", [], dtype=pl.Int8)
672
    empty_ints: list[int] = []
673
    assert a[0] == 1
674
    assert a[:2].to_list() == [1, 2]
675
    assert a[range(1)].to_list() == [1]
676
    assert a[range(0, 4, 2)].to_list() == [1, 3]
677
    assert a[:0].to_list() == []
678
    assert a[empty_ints].to_list() == []
679
    assert a[neg_and_pos_idxs.to_list()].to_list() == [2, 2, 1, 3, 3, 1]
680
    for dtype in (
681
        pl.UInt8,
682
        pl.UInt16,
683
        pl.UInt32,
684
        pl.UInt64,
685
        pl.Int8,
686
        pl.Int16,
687
        pl.Int32,
688
        pl.Int64,
689
    ):
690
        assert a[pos_idxs.cast(dtype)].to_list() == [3, 1, 2, 1]
691
        assert a[pos_idxs.cast(dtype).to_numpy()].to_list() == [3, 1, 2, 1]
692
        assert a[empty_idxs.cast(dtype)].to_list() == []
693
        assert a[empty_idxs.cast(dtype).to_numpy()].to_list() == []
694

695
    for dtype in (pl.Int8, pl.Int16, pl.Int32, pl.Int64):
696
        nps = a[neg_and_pos_idxs.cast(dtype).to_numpy()]
697
        assert nps.to_list() == [2, 2, 1, 3, 3, 1]
698

699

700
def test_set() -> None:
701
    a = pl.Series("a", [True, False, True])
702
    mask = pl.Series("msk", [True, False, True])
703
    a[mask] = False
704
    assert_series_equal(a, pl.Series("a", [False] * 3))
705

706

707
def test_set_value_as_list_fail() -> None:
708
    # only allowed for numerical physical types
709
    s = pl.Series("a", [1, 2, 3])
710
    s[[0, 2]] = [4, 5]
711
    assert s.to_list() == [4, 2, 5]
712

713
    # for other types it is not allowed
714
    s = pl.Series("a", ["a", "b", "c"])
715
    with pytest.raises(TypeError):
716
        s[[0, 1]] = ["d", "e"]
717

718
    s = pl.Series("a", [True, False, False])
719
    with pytest.raises(TypeError):
720
        s[[0, 1]] = [True, False]
721

722

723
@pytest.mark.parametrize("key", [True, False, 1.0])
724
def test_set_invalid_key(key: Any) -> None:
725
    s = pl.Series("a", [1, 2, 3])
726
    with pytest.raises(TypeError):
727
        s[key] = 1
728

729

730
@pytest.mark.parametrize(
731
    "key",
732
    [
733
        pl.Series([False, True, True]),
734
        pl.Series([1, 2], dtype=UInt32),
735
        pl.Series([1, 2], dtype=UInt64),
736
    ],
737
)
738
def test_set_key_series(key: pl.Series) -> None:
739
    """Only UInt32/UInt64/bool are allowed."""
740
    s = pl.Series("a", [1, 2, 3])
741
    s[key] = 4
742
    assert_series_equal(s, pl.Series("a", [1, 4, 4]))
743

744

745
def test_set_np_array_boolean_mask() -> None:
746
    a = pl.Series("a", [1, 2, 3])
747
    mask = np.array([True, False, True])
748
    a[mask] = 4
749
    assert_series_equal(a, pl.Series("a", [4, 2, 4]))
750

751

752
@pytest.mark.parametrize("dtype", [np.int32, np.int64, np.uint32, np.uint64])
753
def test_set_np_array(dtype: Any) -> None:
754
    a = pl.Series("a", [1, 2, 3])
755
    idx = np.array([0, 2], dtype=dtype)
756
    a[idx] = 4
757
    assert_series_equal(a, pl.Series("a", [4, 2, 4]))
758

759

760
@pytest.mark.parametrize("idx", [[0, 2], (0, 2)])
761
def test_set_list_and_tuple(idx: list[int] | tuple[int]) -> None:
762
    a = pl.Series("a", [1, 2, 3])
763
    a[idx] = 4
764
    assert_series_equal(a, pl.Series("a", [4, 2, 4]))
765

766

767
def test_init_nested_tuple() -> None:
768
    s1 = pl.Series("s", (1, 2, 3))
769
    assert s1.to_list() == [1, 2, 3]
770

771
    s2 = pl.Series("s", ((1, 2, 3),), dtype=pl.List(pl.UInt8))
772
    assert s2.to_list() == [[1, 2, 3]]
773
    assert s2.dtype == pl.List(pl.UInt8)
774

775
    s3 = pl.Series("s", ((1, 2, 3), (1, 2, 3)), dtype=pl.List(pl.Int32))
776
    assert s3.to_list() == [[1, 2, 3], [1, 2, 3]]
777
    assert s3.dtype == pl.List(pl.Int32)
778

779

780
def test_fill_null() -> None:
781
    s = pl.Series("a", [1, 2, None])
782
    assert_series_equal(s.fill_null(strategy="forward"), pl.Series("a", [1, 2, 2]))
783
    assert_series_equal(s.fill_null(14), pl.Series("a", [1, 2, 14], dtype=Int64))
784

785
    a = pl.Series("a", [0.0, 1.0, None, 2.0, None, 3.0])
786

787
    assert a.fill_null(0).to_list() == [0.0, 1.0, 0.0, 2.0, 0.0, 3.0]
788
    assert a.fill_null(strategy="zero").to_list() == [0.0, 1.0, 0.0, 2.0, 0.0, 3.0]
789
    assert a.fill_null(strategy="max").to_list() == [0.0, 1.0, 3.0, 2.0, 3.0, 3.0]
790
    assert a.fill_null(strategy="min").to_list() == [0.0, 1.0, 0.0, 2.0, 0.0, 3.0]
791
    assert a.fill_null(strategy="one").to_list() == [0.0, 1.0, 1.0, 2.0, 1.0, 3.0]
792
    assert a.fill_null(strategy="forward").to_list() == [0.0, 1.0, 1.0, 2.0, 2.0, 3.0]
793
    assert a.fill_null(strategy="backward").to_list() == [0.0, 1.0, 2.0, 2.0, 3.0, 3.0]
794
    assert a.fill_null(strategy="mean").to_list() == [0.0, 1.0, 1.5, 2.0, 1.5, 3.0]
795
    assert a.forward_fill().to_list() == [0.0, 1.0, 1.0, 2.0, 2.0, 3.0]
796
    assert a.backward_fill().to_list() == [0.0, 1.0, 2.0, 2.0, 3.0, 3.0]
797

798
    b = pl.Series("b", ["a", None, "c", None, "e"])
799
    assert b.fill_null(strategy="min").to_list() == ["a", "a", "c", "a", "e"]
800
    assert b.fill_null(strategy="max").to_list() == ["a", "e", "c", "e", "e"]
801
    assert b.fill_null(strategy="zero").to_list() == ["a", "", "c", "", "e"]
802
    assert b.fill_null(strategy="forward").to_list() == ["a", "a", "c", "c", "e"]
803
    assert b.fill_null(strategy="backward").to_list() == ["a", "c", "c", "e", "e"]
804

805
    c = pl.Series("c", [b"a", None, b"c", None, b"e"])
806
    assert c.fill_null(strategy="min").to_list() == [b"a", b"a", b"c", b"a", b"e"]
807
    assert c.fill_null(strategy="max").to_list() == [b"a", b"e", b"c", b"e", b"e"]
808
    assert c.fill_null(strategy="zero").to_list() == [b"a", b"", b"c", b"", b"e"]
809
    assert c.fill_null(strategy="forward").to_list() == [b"a", b"a", b"c", b"c", b"e"]
810
    assert c.fill_null(strategy="backward").to_list() == [b"a", b"c", b"c", b"e", b"e"]
811

812
    df = pl.DataFrame(
813
        [
814
            pl.Series("i32", [1, 2, None], dtype=pl.Int32),
815
            pl.Series("i64", [1, 2, None], dtype=pl.Int64),
816
            pl.Series("f32", [1, 2, None], dtype=pl.Float32),
817
            pl.Series("cat", ["a", "b", None], dtype=pl.Categorical),
818
            pl.Series("str", ["a", "b", None], dtype=pl.String),
819
            pl.Series("bool", [True, True, None], dtype=pl.Boolean),
820
        ]
821
    )
822

823
    assert df.fill_null(0, matches_supertype=False).fill_null("bar").fill_null(
824
        False
825
    ).to_dict(as_series=False) == {
826
        "i32": [1, 2, None],
827
        "i64": [1, 2, 0],
828
        "f32": [1.0, 2.0, None],
829
        "cat": ["a", "b", "bar"],
830
        "str": ["a", "b", "bar"],
831
        "bool": [True, True, False],
832
    }
833

834
    assert df.fill_null(0, matches_supertype=True).fill_null("bar").fill_null(
835
        False
836
    ).to_dict(as_series=False) == {
837
        "i32": [1, 2, 0],
838
        "i64": [1, 2, 0],
839
        "f32": [1.0, 2.0, 0.0],
840
        "cat": ["a", "b", "bar"],
841
        "str": ["a", "b", "bar"],
842
        "bool": [True, True, False],
843
    }
844
    df = pl.DataFrame({"a": [1, None, 2, None]})
845

846
    out = df.with_columns(
847
        pl.col("a").cast(pl.UInt8).alias("u8"),
848
        pl.col("a").cast(pl.UInt16).alias("u16"),
849
        pl.col("a").cast(pl.UInt32).alias("u32"),
850
        pl.col("a").cast(pl.UInt64).alias("u64"),
851
    ).fill_null(3)
852

853
    assert out.to_dict(as_series=False) == {
854
        "a": [1, 3, 2, 3],
855
        "u8": [1, 3, 2, 3],
856
        "u16": [1, 3, 2, 3],
857
        "u32": [1, 3, 2, 3],
858
        "u64": [1, 3, 2, 3],
859
    }
860
    assert out.dtypes == [pl.Int64, pl.UInt8, pl.UInt16, pl.UInt32, pl.UInt64]
861

862

863
def test_str_series_min_max_10674() -> None:
864
    str_series = pl.Series("b", ["a", None, "c", None, "e"], dtype=pl.String)
865
    assert str_series.min() == "a"
866
    assert str_series.max() == "e"
867
    assert str_series.sort(descending=False).min() == "a"
868
    assert str_series.sort(descending=True).max() == "e"
869

870

871
def test_fill_nan() -> None:
872
    nan = float("nan")
873
    a = pl.Series("a", [1.0, nan, 2.0, nan, 3.0])
874
    assert_series_equal(a.fill_nan(None), pl.Series("a", [1.0, None, 2.0, None, 3.0]))
875
    assert_series_equal(a.fill_nan(0), pl.Series("a", [1.0, 0.0, 2.0, 0.0, 3.0]))
876

877

878
def test_map_elements() -> None:
879
    with pytest.warns(PolarsInefficientMapWarning):
880
        a = pl.Series("a", [1, 2, None])
881
        b = a.map_elements(lambda x: x**2, return_dtype=pl.Int64)
882
        assert list(b) == [1, 4, None]
883

884
    with pytest.warns(PolarsInefficientMapWarning):
885
        a = pl.Series("a", ["foo", "bar", None])
886
        b = a.map_elements(lambda x: x + "py", return_dtype=pl.String)
887
        assert list(b) == ["foopy", "barpy", None]
888

889
    b = a.map_elements(lambda x: len(x), return_dtype=pl.Int32)
890
    assert list(b) == [3, 3, None]
891

892
    b = a.map_elements(lambda x: len(x))
893
    assert list(b) == [3, 3, None]
894

895
    # just check that it runs (somehow problem with conditional compilation)
896
    a = pl.Series("a", [2, 2, 3]).cast(pl.Datetime)
897
    a.map_elements(lambda x: x)
898
    a = pl.Series("a", [2, 2, 3]).cast(pl.Date)
899
    a.map_elements(lambda x: x)
900

901

902
def test_shape() -> None:
903
    s = pl.Series([1, 2, 3])
904
    assert s.shape == (3,)
905

906

907
@pytest.mark.parametrize("arrow_available", [True, False])
908
def test_create_list_series(arrow_available: bool, monkeypatch: Any) -> None:
909
    monkeypatch.setattr(pl.series.series, "_PYARROW_AVAILABLE", arrow_available)
910
    a = [[1, 2], None, [None, 3]]
911
    s = pl.Series("", a)
912
    assert s.to_list() == a
913

914

915
def test_iter() -> None:
916
    s = pl.Series("", [1, 2, 3])
917

918
    itr = s.__iter__()
919
    assert itr.__next__() == 1
920
    assert itr.__next__() == 2
921
    assert itr.__next__() == 3
922
    assert sum(s) == 6
923

924

925
def test_empty() -> None:
926
    a = pl.Series(dtype=pl.Int8)
927
    assert a.dtype == pl.Int8
928
    assert a.is_empty()
929

930
    a = pl.Series()
931
    assert a.dtype == pl.Null
932
    assert a.is_empty()
933

934
    a = pl.Series("name", [])
935
    assert a.dtype == pl.Null
936
    assert a.is_empty()
937

938
    a = pl.Series(values=(), dtype=pl.Int8)
939
    assert a.dtype == pl.Int8
940
    assert a.is_empty()
941

942
    assert_series_equal(pl.Series(), pl.Series())
943
    assert_series_equal(
944
        pl.Series(dtype=pl.Int32), pl.Series(dtype=pl.Int64), check_dtypes=False
945
    )
946

947
    with pytest.raises(TypeError, match="ambiguous"):
948
        not pl.Series()
949

950

951
def test_round() -> None:
952
    a = pl.Series("f", [1.003, 2.003])
953
    b = a.round(2)
954
    assert b.to_list() == [1.00, 2.00]
955

956
    b = a.round()
957
    assert b.to_list() == [1.0, 2.0]
958

959

960
def test_round_int() -> None:
961
    s = pl.Series([1, 2, 3])
962
    assert_series_equal(s, s.round())
963

964

965
@pytest.mark.parametrize(
966
    ("series", "digits", "expected_result"),
967
    [
968
        pytest.param(pl.Series([1.234, 0.1234]), 2, pl.Series([1.2, 0.12]), id="f64"),
969
        pytest.param(
970
            pl.Series([1.234, 0.1234]).cast(pl.Float32),
971
            2,
972
            pl.Series([1.2, 0.12]).cast(pl.Float32),
973
            id="f32",
974
        ),
975
        pytest.param(pl.Series([123400, 1234]), 2, pl.Series([120000, 1200]), id="i64"),
976
        pytest.param(
977
            pl.Series([123400, 1234]).cast(pl.Int32),
978
            2,
979
            pl.Series([120000, 1200]).cast(pl.Int32),
980
            id="i32",
981
        ),
982
        pytest.param(
983
            pl.Series([0.0]), 2, pl.Series([0.0]), id="0 should remain the same"
984
        ),
985
    ],
986
)
987
def test_round_sig_figs(
988
    series: pl.Series, digits: int, expected_result: pl.Series
989
) -> None:
990
    result = series.round_sig_figs(digits=digits)
991
    assert_series_equal(result, expected_result)
992

993

994
def test_round_sig_figs_raises_exc() -> None:
995
    with pytest.raises(pl.exceptions.InvalidOperationError):
996
        pl.Series([1.234, 0.1234]).round_sig_figs(digits=0)
997

998

999
def test_apply_list_out() -> None:
1000
    s = pl.Series("count", [3, 2, 2])
1001
    out = s.map_elements(lambda val: pl.repeat(val, val, eager=True))
1002
    assert out[0].to_list() == [3, 3, 3]
1003
    assert out[1].to_list() == [2, 2]
1004
    assert out[2].to_list() == [2, 2]
1005

1006

1007
def test_reinterpret() -> None:
1008
    s = pl.Series("a", [1, 1, 2], dtype=pl.UInt64)
1009
    assert s.reinterpret(signed=True).dtype == pl.Int64
1010
    df = pl.DataFrame([s])
1011
    assert df.select([pl.col("a").reinterpret(signed=True)])["a"].dtype == pl.Int64
1012

1013

1014
def test_mode() -> None:
1015
    s = pl.Series("a", [1, 1, 2])
1016
    assert s.mode().to_list() == [1]
1017
    assert s.set_sorted().mode().to_list() == [1]
1018

1019
    df = pl.DataFrame([s])
1020
    assert df.select([pl.col("a").mode()])["a"].to_list() == [1]
1021
    assert (
1022
        pl.Series(["foo", "bar", "buz", "bar"], dtype=pl.Categorical).mode().item()
1023
        == "bar"
1024
    )
1025
    assert pl.Series([1.0, 2.0, 3.0, 2.0]).mode().item() == 2.0
1026
    assert pl.Series(["a", "b", "c", "b"]).mode().item() == "b"
1027

1028
    # sorted data
1029
    assert set(pl.int_range(0, 3, eager=True).mode().to_list()) == {0, 1, 2}
1030

1031

1032
def test_diff() -> None:
1033
    s = pl.Series("a", [1, 2, 3, 2, 2, 3, 0])
1034

1035
    assert_series_equal(
1036
        s.diff(),
1037
        pl.Series("a", [None, 1, 1, -1, 0, 1, -3]),
1038
    )
1039
    assert_series_equal(
1040
        s.diff(null_behavior="drop"),
1041
        pl.Series("a", [1, 1, -1, 0, 1, -3]),
1042
    )
1043

1044

1045
def test_diff_negative() -> None:
1046
    s = pl.Series("a", [1, 2, 3, 2, 2, 3, 0])
1047

1048
    assert_series_equal(
1049
        s.diff(-1),
1050
        pl.Series("a", [-1, -1, 1, 0, -1, 3, None]),
1051
    )
1052
    assert_series_equal(
1053
        s.diff(-1, null_behavior="drop"),
1054
        pl.Series("a", [-1, -1, 1, 0, -1, 3]),
1055
    )
1056

1057

1058
def test_pct_change() -> None:
1059
    s = pl.Series("a", [1, 2, 4, 8, 16, 32, 64])
1060
    expected = pl.Series("a", [None, None, 3.0, 3.0, 3.0, 3.0, 3.0])
1061
    assert_series_equal(s.pct_change(2), expected)
1062
    assert_series_equal(s.pct_change(pl.Series([2])), expected)
1063
    # negative
1064
    assert pl.Series(range(5)).pct_change(-1).to_list() == [
1065
        -1.0,
1066
        -0.5,
1067
        -0.3333333333333333,
1068
        -0.25,
1069
        None,
1070
    ]
1071

1072

1073
def test_skew() -> None:
1074
    s = pl.Series("a", [1, 2, 3, 2, 2, 3, 0])
1075

1076
    assert s.skew(bias=True) == pytest.approx(-0.5953924651018018)
1077
    assert s.skew(bias=False) == pytest.approx(-0.7717168360221258)
1078

1079
    df = pl.DataFrame([s])
1080
    assert np.isclose(
1081
        df.select(pl.col("a").skew(bias=False))["a"][0], -0.7717168360221258
1082
    )
1083

1084

1085
def test_kurtosis() -> None:
1086
    s = pl.Series("a", [1, 2, 3, 2, 2, 3, 0])
1087
    expected = -0.6406250000000004
1088

1089
    assert s.kurtosis() == pytest.approx(expected)
1090
    df = pl.DataFrame([s])
1091
    assert np.isclose(df.select(pl.col("a").kurtosis())["a"][0], expected)
1092

1093

1094
def test_sqrt() -> None:
1095
    s = pl.Series("a", [1, 2])
1096
    assert_series_equal(s.sqrt(), pl.Series("a", [1.0, np.sqrt(2)]))
1097
    df = pl.DataFrame([s])
1098
    assert_series_equal(
1099
        df.select(pl.col("a").sqrt())["a"], pl.Series("a", [1.0, np.sqrt(2)])
1100
    )
1101

1102

1103
def test_cbrt() -> None:
1104
    s = pl.Series("a", [1, 2])
1105
    assert_series_equal(s.cbrt(), pl.Series("a", [1.0, np.cbrt(2)]))
1106
    df = pl.DataFrame([s])
1107
    assert_series_equal(
1108
        df.select(pl.col("a").cbrt())["a"], pl.Series("a", [1.0, np.cbrt(2)])
1109
    )
1110

1111

1112
def test_range() -> None:
1113
    s1 = pl.Series("a", [1, 2, 3, 2, 2, 3, 0])
1114
    assert_series_equal(s1[2:5], s1[range(2, 5)])
1115

1116
    ranges = [range(-2, 1), range(3), range(2, 8, 2)]
1117

1118
    s2 = pl.Series("b", ranges, dtype=pl.List(pl.Int8))
1119
    assert s2.to_list() == [[-2, -1, 0], [0, 1, 2], [2, 4, 6]]
1120
    assert s2.dtype == pl.List(pl.Int8)
1121
    assert s2.name == "b"
1122

1123
    s3 = pl.Series("c", (ranges for _ in range(3)))
1124
    assert s3.to_list() == [
1125
        [[-2, -1, 0], [0, 1, 2], [2, 4, 6]],
1126
        [[-2, -1, 0], [0, 1, 2], [2, 4, 6]],
1127
        [[-2, -1, 0], [0, 1, 2], [2, 4, 6]],
1128
    ]
1129
    assert s3.dtype == pl.List(pl.List(pl.Int64))
1130

1131
    df = pl.DataFrame([s1])
1132
    assert_frame_equal(df[2:5], df[range(2, 5)])
1133

1134

1135
def test_strict_cast() -> None:
1136
    with pytest.raises(InvalidOperationError):
1137
        pl.Series("a", [2**16]).cast(dtype=pl.Int16, strict=True)
1138
    with pytest.raises(InvalidOperationError):
1139
        pl.DataFrame({"a": [2**16]}).select([pl.col("a").cast(pl.Int16, strict=True)])
1140

1141

1142
def test_floor_divide() -> None:
1143
    s = pl.Series("a", [1, 2, 3])
1144
    assert_series_equal(s // 2, pl.Series("a", [0, 1, 1]))
1145
    assert_series_equal(
1146
        pl.DataFrame([s]).select(pl.col("a") // 2)["a"], pl.Series("a", [0, 1, 1])
1147
    )
1148

1149

1150
def test_true_divide() -> None:
1151
    s = pl.Series("a", [1, 2])
1152
    assert_series_equal(s / 2, pl.Series("a", [0.5, 1.0]))
1153
    assert_series_equal(
1154
        pl.DataFrame([s]).select(pl.col("a") / 2)["a"], pl.Series("a", [0.5, 1.0])
1155
    )
1156

1157
    # rtruediv
1158
    assert_series_equal(
1159
        pl.DataFrame([s]).select(2 / pl.col("a"))["literal"],
1160
        pl.Series("literal", [2.0, 1.0]),
1161
    )
1162

1163
    # https://github.com/pola-rs/polars/issues/1369
1164
    vals = [3000000000, 2, 3]
1165
    foo = pl.Series(vals)
1166
    assert_series_equal(foo / 1, pl.Series(vals, dtype=Float64))
1167
    assert_series_equal(
1168
        pl.DataFrame({"a": vals}).select([pl.col("a") / 1])["a"],
1169
        pl.Series("a", vals, dtype=Float64),
1170
    )
1171

1172

1173
def test_bitwise() -> None:
1174
    a = pl.Series("a", [1, 2, 3])
1175
    b = pl.Series("b", [3, 4, 5])
1176
    assert_series_equal(a & b, pl.Series("a", [1, 0, 1]))
1177
    assert_series_equal(a | b, pl.Series("a", [3, 6, 7]))
1178
    assert_series_equal(a ^ b, pl.Series("a", [2, 6, 6]))
1179

1180
    df = pl.DataFrame([a, b])
1181
    out = df.select(
1182
        (pl.col("a") & pl.col("b")).alias("and"),
1183
        (pl.col("a") | pl.col("b")).alias("or"),
1184
        (pl.col("a") ^ pl.col("b")).alias("xor"),
1185
    )
1186
    assert_series_equal(out["and"], pl.Series("and", [1, 0, 1]))
1187
    assert_series_equal(out["or"], pl.Series("or", [3, 6, 7]))
1188
    assert_series_equal(out["xor"], pl.Series("xor", [2, 6, 6]))
1189

1190
    # ensure mistaken use of logical 'and'/'or' raises an exception
1191
    with pytest.raises(TypeError, match="ambiguous"):
1192
        a and b  # type: ignore[redundant-expr]
1193

1194
    with pytest.raises(TypeError, match="ambiguous"):
1195
        a or b  # type: ignore[redundant-expr]
1196

1197

1198
def test_from_generator_or_iterable() -> None:
1199
    # generator function
1200
    def gen(n: int) -> Iterator[int]:
1201
        yield from range(n)
1202

1203
    # iterable object
1204
    class Data:
1205
        def __init__(self, n: int) -> None:
1206
            self._n = n
1207

1208
        def __iter__(self) -> Iterator[int]:
1209
            yield from gen(self._n)
1210

1211
    expected = pl.Series("s", range(10))
1212
    assert expected.dtype == pl.Int64
1213

1214
    for generated_series in (
1215
        pl.Series("s", values=gen(10)),
1216
        pl.Series("s", values=Data(10)),
1217
        pl.Series("s", values=(x for x in gen(10))),
1218
    ):
1219
        assert_series_equal(expected, generated_series)
1220

1221
    # test 'iterable_to_pyseries' directly to validate 'chunk_size' behaviour
1222
    ps1 = iterable_to_pyseries("s", gen(10), dtype=pl.UInt8)
1223
    ps2 = iterable_to_pyseries("s", gen(10), dtype=pl.UInt8, chunk_size=3)
1224
    ps3 = iterable_to_pyseries("s", Data(10), dtype=pl.UInt8, chunk_size=6)
1225

1226
    expected = pl.Series("s", range(10), dtype=pl.UInt8)
1227
    assert expected.dtype == pl.UInt8
1228

1229
    for ps in (ps1, ps2, ps3):
1230
        generated_series = pl.Series("s")
1231
        generated_series._s = ps
1232
        assert_series_equal(expected, generated_series)
1233

1234
    # empty generator
1235
    assert_series_equal(pl.Series("s", []), pl.Series("s", values=gen(0)))
1236

1237

1238
def test_from_sequences(monkeypatch: Any) -> None:
1239
    # test int, str, bool, flt
1240
    values = [
1241
        [[1], [None, 3]],
1242
        [["foo"], [None, "bar"]],
1243
        [[True], [None, False]],
1244
        [[1.0], [None, 3.0]],
1245
    ]
1246

1247
    for vals in values:
1248
        monkeypatch.setattr(pl.series.series, "_PYARROW_AVAILABLE", False)
1249
        a = pl.Series("a", vals)
1250
        monkeypatch.setattr(pl.series.series, "_PYARROW_AVAILABLE", True)
1251
        b = pl.Series("a", vals)
1252
        assert_series_equal(a, b)
1253
        assert a.to_list() == vals
1254

1255

1256
def test_comparisons_int_series_to_float() -> None:
1257
    srs_int = pl.Series([1, 2, 3, 4])
1258

1259
    assert_series_equal(srs_int - 1.0, pl.Series([0.0, 1.0, 2.0, 3.0]))
1260
    assert_series_equal(srs_int + 1.0, pl.Series([2.0, 3.0, 4.0, 5.0]))
1261
    assert_series_equal(srs_int * 2.0, pl.Series([2.0, 4.0, 6.0, 8.0]))
1262
    assert_series_equal(srs_int / 2.0, pl.Series([0.5, 1.0, 1.5, 2.0]))
1263
    assert_series_equal(srs_int % 2.0, pl.Series([1.0, 0.0, 1.0, 0.0]))
1264
    assert_series_equal(4.0 % srs_int, pl.Series([0.0, 0.0, 1.0, 0.0]))
1265

1266
    assert_series_equal(srs_int // 2.0, pl.Series([0.0, 1.0, 1.0, 2.0]))
1267
    assert_series_equal(srs_int < 3.0, pl.Series([True, True, False, False]))
1268
    assert_series_equal(srs_int <= 3.0, pl.Series([True, True, True, False]))
1269
    assert_series_equal(srs_int > 3.0, pl.Series([False, False, False, True]))
1270
    assert_series_equal(srs_int >= 3.0, pl.Series([False, False, True, True]))
1271
    assert_series_equal(srs_int == 3.0, pl.Series([False, False, True, False]))
1272
    assert_series_equal(srs_int - True, pl.Series([0, 1, 2, 3]))
1273

1274

1275
def test_comparisons_int_series_to_float_scalar() -> None:
1276
    srs_int = pl.Series([1, 2, 3, 4])
1277

1278
    assert_series_equal(srs_int < 1.5, pl.Series([True, False, False, False]))
1279
    assert_series_equal(srs_int > 1.5, pl.Series([False, True, True, True]))
1280

1281

1282
def test_comparisons_datetime_series_to_date_scalar() -> None:
1283
    srs_date = pl.Series([date(2023, 1, 1), date(2023, 1, 2), date(2023, 1, 3)])
1284
    dt = datetime(2023, 1, 1, 12, 0, 0)
1285

1286
    assert_series_equal(srs_date < dt, pl.Series([True, False, False]))
1287
    assert_series_equal(srs_date > dt, pl.Series([False, True, True]))
1288

1289

1290
def test_comparisons_float_series_to_int() -> None:
1291
    srs_float = pl.Series([1.0, 2.0, 3.0, 4.0])
1292

1293
    assert_series_equal(srs_float - 1, pl.Series([0.0, 1.0, 2.0, 3.0]))
1294
    assert_series_equal(srs_float + 1, pl.Series([2.0, 3.0, 4.0, 5.0]))
1295
    assert_series_equal(srs_float * 2, pl.Series([2.0, 4.0, 6.0, 8.0]))
1296
    assert_series_equal(srs_float / 2, pl.Series([0.5, 1.0, 1.5, 2.0]))
1297
    assert_series_equal(srs_float % 2, pl.Series([1.0, 0.0, 1.0, 0.0]))
1298
    assert_series_equal(4 % srs_float, pl.Series([0.0, 0.0, 1.0, 0.0]))
1299

1300
    assert_series_equal(srs_float // 2, pl.Series([0.0, 1.0, 1.0, 2.0]))
1301
    assert_series_equal(srs_float < 3, pl.Series([True, True, False, False]))
1302
    assert_series_equal(srs_float <= 3, pl.Series([True, True, True, False]))
1303
    assert_series_equal(srs_float > 3, pl.Series([False, False, False, True]))
1304
    assert_series_equal(srs_float >= 3, pl.Series([False, False, True, True]))
1305
    assert_series_equal(srs_float == 3, pl.Series([False, False, True, False]))
1306
    assert_series_equal(srs_float - True, pl.Series([0.0, 1.0, 2.0, 3.0]))
1307

1308

1309
def test_comparisons_bool_series_to_int() -> None:
1310
    srs_bool = pl.Series([True, False])
1311

1312
    # (native bool comparison should work...)
1313
    for t, f in ((True, False), (False, True)):
1314
        assert list(srs_bool == t) == list(srs_bool != f) == [t, f]
1315

1316
    # TODO: do we want this to work?
1317
    assert_series_equal(srs_bool / 1, pl.Series([True, False], dtype=Float64))
1318
    match = (
1319
        r"cannot do arithmetic with Series of dtype: Boolean"
1320
        r" and argument of type: 'bool'"
1321
    )
1322
    with pytest.raises(TypeError, match=match):
1323
        srs_bool - 1
1324
    with pytest.raises(TypeError, match=match):
1325
        srs_bool + 1
1326
    match = (
1327
        r"cannot do arithmetic with Series of dtype: Boolean"
1328
        r" and argument of type: 'bool'"
1329
    )
1330
    with pytest.raises(TypeError, match=match):
1331
        srs_bool % 2
1332
    with pytest.raises(TypeError, match=match):
1333
        srs_bool * 1
1334

1335
    from operator import ge, gt, le, lt
1336

1337
    for op in (ge, gt, le, lt):
1338
        for scalar in (0, 1.0, True, False):
1339
            op_str = op.__name__.replace("e", "t_eq")
1340
            with pytest.raises(
1341
                NotImplementedError,
1342
                match=rf"Series of type Boolean does not have {op_str} operator",
1343
            ):
1344
                op(srs_bool, scalar)
1345

1346

1347
@pytest.mark.parametrize(
1348
    ("values", "compare_with", "compares_equal"),
1349
    [
1350
        (
1351
            [date(1999, 12, 31), date(2021, 1, 31)],
1352
            date(2021, 1, 31),
1353
            [False, True],
1354
        ),
1355
        (
1356
            [datetime(2021, 1, 1, 12, 0, 0), datetime(2021, 1, 2, 12, 0, 0)],
1357
            datetime(2021, 1, 1, 12, 0, 0),
1358
            [True, False],
1359
        ),
1360
        (
1361
            [timedelta(days=1), timedelta(days=2)],
1362
            timedelta(days=1),
1363
            [True, False],
1364
        ),
1365
    ],
1366
)
1367
def test_temporal_comparison(
1368
    values: list[Any], compare_with: Any, compares_equal: list[bool]
1369
) -> None:
1370
    assert_series_equal(
1371
        pl.Series(values) == compare_with,
1372
        pl.Series(compares_equal, dtype=pl.Boolean),
1373
    )
1374

1375

1376
def test_to_dummies() -> None:
1377
    s = pl.Series("a", [1, 2, 3])
1378
    result = s.to_dummies()
1379
    expected = pl.DataFrame(
1380
        {"a_1": [1, 0, 0], "a_2": [0, 1, 0], "a_3": [0, 0, 1]},
1381
        schema={"a_1": pl.UInt8, "a_2": pl.UInt8, "a_3": pl.UInt8},
1382
    )
1383
    assert_frame_equal(result, expected)
1384

1385

1386
def test_to_dummies_drop_first() -> None:
1387
    s = pl.Series("a", [1, 2, 3])
1388
    result = s.to_dummies(drop_first=True)
1389
    expected = pl.DataFrame(
1390
        {"a_2": [0, 1, 0], "a_3": [0, 0, 1]},
1391
        schema={"a_2": pl.UInt8, "a_3": pl.UInt8},
1392
    )
1393
    assert_frame_equal(result, expected)
1394

1395

1396
def test_to_dummies_drop_nulls() -> None:
1397
    s = pl.Series("a", [1, 2, None])
1398
    result = s.to_dummies(drop_nulls=True)
1399
    expected = pl.DataFrame(
1400
        {"a_1": [1, 0, 0], "a_2": [0, 1, 0]},
1401
        schema={"a_1": pl.UInt8, "a_2": pl.UInt8},
1402
    )
1403
    assert_frame_equal(result, expected)
1404

1405

1406
def test_to_dummies_null_clash_19096() -> None:
1407
    with pytest.raises(
1408
        DuplicateError, match="column with name '_null' has more than one occurrence"
1409
    ):
1410
        pl.Series([None, "null"]).to_dummies()
1411

1412

1413
def test_chunk_lengths() -> None:
1414
    s = pl.Series("a", [1, 2, 2, 3])
1415
    # this is a Series with one chunk, of length 4
1416
    assert s.n_chunks() == 1
1417
    assert s.chunk_lengths() == [4]
1418

1419

1420
def test_limit() -> None:
1421
    s = pl.Series("a", [1, 2, 3])
1422
    assert_series_equal(s.limit(2), pl.Series("a", [1, 2]))
1423

1424

1425
def test_filter() -> None:
1426
    s = pl.Series("a", [1, 2, 3])
1427
    mask = pl.Series("", [True, False, True])
1428

1429
    assert_series_equal(s.filter(mask), pl.Series("a", [1, 3]))
1430
    assert_series_equal(s.filter([True, False, True]), pl.Series("a", [1, 3]))
1431
    assert_series_equal(s.filter(np.array([True, False, True])), pl.Series("a", [1, 3]))
1432

1433
    with pytest.raises(RuntimeError, match="Expected a boolean mask"):
1434
        s.filter(np.array([1, 0, 1]))
1435

1436

1437
def test_gather_every() -> None:
1438
    s = pl.Series("a", [1, 2, 3, 4])
1439
    assert_series_equal(s.gather_every(2), pl.Series("a", [1, 3]))
1440
    assert_series_equal(s.gather_every(2, offset=1), pl.Series("a", [2, 4]))
1441

1442

1443
def test_arg_sort() -> None:
1444
    s = pl.Series("a", [5, 3, 4, 1, 2])
1445
    expected = pl.Series("a", [3, 4, 1, 2, 0], dtype=UInt32)
1446

1447
    assert_series_equal(s.arg_sort(), expected)
1448

1449
    expected_descending = pl.Series("a", [0, 2, 1, 4, 3], dtype=UInt32)
1450
    assert_series_equal(s.arg_sort(descending=True), expected_descending)
1451

1452

1453
@pytest.mark.parametrize(
1454
    ("series", "argmin", "argmax"),
1455
    [
1456
        # Numeric
1457
        (pl.Series([5, 3, 4, 1, 2]), 3, 0),
1458
        (pl.Series([None, 5, 1]), 2, 1),
1459
        # Boolean
1460
        (pl.Series([True, False]), 1, 0),
1461
        (pl.Series([True, True]), 0, 0),
1462
        (pl.Series([False, False]), 0, 0),
1463
        (pl.Series([None, True, False, True]), 2, 1),
1464
        (pl.Series([None, True, True]), 1, 1),
1465
        (pl.Series([None, False, False]), 1, 1),
1466
        # String
1467
        (pl.Series(["a", "c", "b"]), 0, 1),
1468
        (pl.Series([None, "a", None, "b"]), 1, 3),
1469
        # Categorical
1470
        (pl.Series(["c", "b", "a"], dtype=pl.Categorical(ordering="lexical")), 2, 0),
1471
        (pl.Series("s", [None, "c", "b", None, "a"], pl.Categorical("lexical")), 4, 1),
1472
    ],
1473
)
1474
def test_arg_min_arg_max(series: pl.Series, argmin: int, argmax: int) -> None:
1475
    assert series.arg_min() == argmin, (
1476
        f"values: {series.to_list()}, expected {argmin} got {series.arg_min()}"
1477
    )
1478
    assert series.arg_max() == argmax, (
1479
        f"values: {series.to_list()}, expected {argmax} got {series.arg_max()}"
1480
    )
1481

1482

1483
@pytest.mark.parametrize(
1484
    ("series"),
1485
    [
1486
        # All nulls
1487
        pl.Series([None, None], dtype=pl.Int32),
1488
        pl.Series([None, None], dtype=pl.Boolean),
1489
        pl.Series([None, None], dtype=pl.String),
1490
        pl.Series([None, None], dtype=pl.Categorical),
1491
        pl.Series([None, None], dtype=pl.Categorical(ordering="lexical")),
1492
        # Empty Series
1493
        pl.Series([], dtype=pl.Int32),
1494
        pl.Series([], dtype=pl.Boolean),
1495
        pl.Series([], dtype=pl.String),
1496
        pl.Series([], dtype=pl.Categorical),
1497
    ],
1498
)
1499
def test_arg_min_arg_max_all_nulls_or_empty(series: pl.Series) -> None:
1500
    assert series.arg_min() is None
1501
    assert series.arg_max() is None
1502

1503

1504
def test_arg_min_and_arg_max_sorted() -> None:
1505
    # test ascending and descending numerical series
1506
    s = pl.Series([None, 1, 2, 3, 4, 5])
1507
    s.sort(in_place=True)  # set ascending sorted flag
1508
    assert s.flags == {"SORTED_ASC": True, "SORTED_DESC": False}
1509
    assert s.arg_min() == 1
1510
    assert s.arg_max() == 5
1511
    s = pl.Series([None, 5, 4, 3, 2, 1])
1512
    s.sort(descending=True, in_place=True)  # set descing sorted flag
1513
    assert s.flags == {"SORTED_ASC": False, "SORTED_DESC": True}
1514
    assert s.arg_min() == 5
1515
    assert s.arg_max() == 1
1516

1517
    # test ascending and descending str series
1518
    s = pl.Series([None, "a", "b", "c", "d", "e"])
1519
    s.sort(in_place=True)  # set ascending sorted flag
1520
    assert s.flags == {"SORTED_ASC": True, "SORTED_DESC": False}
1521
    assert s.arg_min() == 1
1522
    assert s.arg_max() == 5
1523
    s = pl.Series([None, "e", "d", "c", "b", "a"])
1524
    s.sort(descending=True, in_place=True)  # set descing sorted flag
1525
    assert s.flags == {"SORTED_ASC": False, "SORTED_DESC": True}
1526
    assert s.arg_min() == 5
1527
    assert s.arg_max() == 1
1528

1529

1530
def test_is_null_is_not_null() -> None:
1531
    s = pl.Series("a", [1.0, 2.0, 3.0, None])
1532
    assert_series_equal(s.is_null(), pl.Series("a", [False, False, False, True]))
1533
    assert_series_equal(s.is_not_null(), pl.Series("a", [True, True, True, False]))
1534

1535

1536
def test_is_finite_is_infinite() -> None:
1537
    s = pl.Series("a", [1.0, 2.0, np.inf])
1538
    assert_series_equal(s.is_finite(), pl.Series("a", [True, True, False]))
1539
    assert_series_equal(s.is_infinite(), pl.Series("a", [False, False, True]))
1540

1541

1542
@pytest.mark.parametrize("float_type", [pl.Float32, pl.Float64])
1543
def test_is_nan_is_not_nan(float_type: PolarsDataType) -> None:
1544
    s = pl.Series([1.0, np.nan, None], dtype=float_type)
1545

1546
    assert_series_equal(s.is_nan(), pl.Series([False, True, None]))
1547
    assert_series_equal(s.is_not_nan(), pl.Series([True, False, None]))
1548
    assert_series_equal(s.fill_nan(2.0), pl.Series([1.0, 2.0, None], dtype=float_type))
1549
    assert_series_equal(s.drop_nans(), pl.Series([1.0, None], dtype=float_type))
1550

1551

1552
def test_float_methods_on_ints() -> None:
1553
    # these float-specific methods work on non-float numeric types
1554
    s = pl.Series([1, None], dtype=pl.Int32)
1555
    assert_series_equal(s.is_finite(), pl.Series([True, None]))
1556
    assert_series_equal(s.is_infinite(), pl.Series([False, None]))
1557
    assert_series_equal(s.is_nan(), pl.Series([False, None]))
1558
    assert_series_equal(s.is_not_nan(), pl.Series([True, None]))
1559

1560

1561
def test_dot() -> None:
1562
    s1 = pl.Series("a", [1, 2, 3])
1563
    s2 = pl.Series("b", [4.0, 5.0, 6.0])
1564

1565
    assert np.array([1, 2, 3]) @ np.array([4, 5, 6]) == 32
1566

1567
    for dot_result in (
1568
        s1.dot(s2),
1569
        s1 @ s2,
1570
        [1, 2, 3] @ s2,
1571
        s1 @ np.array([4, 5, 6]),
1572
    ):
1573
        assert dot_result == 32
1574

1575
    with pytest.raises(ShapeError, match="length mismatch"):
1576
        s1 @ [4, 5, 6, 7, 8]
1577

1578

1579
@pytest.mark.parametrize(
1580
    ("dtype"),
1581
    [pl.Int8, pl.Int16, pl.Int32, pl.Float32, pl.Float64],
1582
)
1583
def test_peak_max_peak_min(dtype: pl.DataType) -> None:
1584
    s = pl.Series("a", [4, 1, 3, 2, 5], dtype=dtype)
1585

1586
    result = s.peak_min()
1587
    expected = pl.Series("a", [False, True, False, True, False])
1588
    assert_series_equal(result, expected)
1589

1590
    result = s.peak_max()
1591
    expected = pl.Series("a", [True, False, True, False, True])
1592
    assert_series_equal(result, expected)
1593

1594

1595
def test_peak_max_peak_min_bool() -> None:
1596
    s = pl.Series("a", [False, True, False, True, True, False], dtype=pl.Boolean)
1597
    result = s.peak_min()
1598
    expected = pl.Series("a", [False, False, True, False, False, False])
1599
    assert_series_equal(result, expected)
1600

1601
    result = s.peak_max()
1602
    expected = pl.Series("a", [False, True, False, False, False, False])
1603
    assert_series_equal(result, expected)
1604

1605

1606
def test_shrink_to_fit() -> None:
1607
    s = pl.Series("a", [4, 1, 3, 2, 5])
1608
    sf = s.shrink_to_fit(in_place=True)
1609
    assert sf is s
1610

1611
    s = pl.Series("a", [4, 1, 3, 2, 5])
1612
    sf = s.shrink_to_fit(in_place=False)
1613
    assert s is not sf
1614

1615

1616
@pytest.mark.parametrize("unit", ["ns", "us", "ms"])
1617
def test_cast_datetime_to_time(unit: TimeUnit) -> None:
1618
    a = pl.Series(
1619
        "a",
1620
        [
1621
            datetime(2022, 9, 7, 0, 0),
1622
            datetime(2022, 9, 6, 12, 0),
1623
            datetime(2022, 9, 7, 23, 59, 59),
1624
            datetime(2022, 9, 7, 23, 59, 59, 201),
1625
        ],
1626
        dtype=Datetime(unit),
1627
    )
1628
    if unit == "ms":
1629
        # NOTE: microseconds are lost for `unit=ms`
1630
        expected_values = [time(0, 0), time(12, 0), time(23, 59, 59), time(23, 59, 59)]
1631
    else:
1632
        expected_values = [
1633
            time(0, 0),
1634
            time(12, 0),
1635
            time(23, 59, 59),
1636
            time(23, 59, 59, 201),
1637
        ]
1638
    expected = pl.Series("a", expected_values)
1639
    assert_series_equal(a.cast(Time), expected)
1640

1641

1642
def test_init_categorical() -> None:
1643
    for values in [[None], ["foo", "bar"], [None, "foo", "bar"]]:
1644
        expected = pl.Series("a", values, dtype=pl.String).cast(pl.Categorical)
1645
        a = pl.Series("a", values, dtype=pl.Categorical)
1646
        assert_series_equal(a, expected)
1647

1648

1649
def test_iter_nested_list() -> None:
1650
    elems = list(pl.Series("s", [[1, 2], [3, 4]]))
1651
    assert_series_equal(elems[0], pl.Series([1, 2]))
1652
    assert_series_equal(elems[1], pl.Series([3, 4]))
1653

1654
    rev_elems = list(reversed(pl.Series("s", [[1, 2], [3, 4]])))
1655
    assert_series_equal(rev_elems[0], pl.Series([3, 4]))
1656
    assert_series_equal(rev_elems[1], pl.Series([1, 2]))
1657

1658

1659
def test_iter_nested_struct() -> None:
1660
    # note: this feels inconsistent with the above test for nested list, but
1661
    # let's ensure the behaviour is codified before potentially modifying...
1662
    elems = list(pl.Series("s", [{"a": 1, "b": 2}, {"a": 3, "b": 4}]))
1663
    assert elems[0] == {"a": 1, "b": 2}
1664
    assert elems[1] == {"a": 3, "b": 4}
1665

1666
    rev_elems = list(reversed(pl.Series("s", [{"a": 1, "b": 2}, {"a": 3, "b": 4}])))
1667
    assert rev_elems[0] == {"a": 3, "b": 4}
1668
    assert rev_elems[1] == {"a": 1, "b": 2}
1669

1670

1671
@pytest.mark.parametrize(
1672
    "dtype",
1673
    [
1674
        pl.UInt8,
1675
        pl.Float32,
1676
        pl.Int32,
1677
        pl.Boolean,
1678
        pl.List(pl.String),
1679
        pl.Struct([pl.Field("a", pl.Int64), pl.Field("b", pl.Boolean)]),
1680
    ],
1681
)
1682
def test_nested_list_types_preserved(dtype: pl.DataType) -> None:
1683
    srs = pl.Series([pl.Series([], dtype=dtype) for _ in range(5)])
1684
    for srs_nested in srs:
1685
        assert srs_nested.dtype == dtype
1686

1687

1688
def test_to_physical() -> None:
1689
    # casting an int result in an int
1690
    s = pl.Series("a", [1, 2, 3])
1691
    assert_series_equal(s.to_physical(), s)
1692

1693
    # casting a date results in an Int32
1694
    s = pl.Series("a", [date(2020, 1, 1)] * 3)
1695
    expected = pl.Series("a", [18262] * 3, dtype=Int32)
1696
    assert_series_equal(s.to_physical(), expected)
1697

1698
    # casting a categorical results in a UInt32
1699
    s = pl.Series(["cat1"]).cast(pl.Categorical)
1700
    assert s.to_physical().dtype == pl.UInt32
1701

1702
    # casting a small enum results in a UInt8
1703
    s = pl.Series(["cat1"]).cast(pl.Enum(["cat1"]))
1704
    assert s.to_physical().dtype == pl.UInt8
1705

1706
    # casting a List(Categorical) results in a List(UInt32)
1707
    s = pl.Series([["cat1"]]).cast(pl.List(pl.Categorical))
1708
    assert s.to_physical().dtype == pl.List(pl.UInt32)
1709

1710
    # casting a List(Enum) with a small enum results in a List(UInt8)
1711
    s = pl.Series(["cat1"]).cast(pl.List(pl.Enum(["cat1"])))
1712
    assert s.to_physical().dtype == pl.List(pl.UInt8)
1713

1714

1715
def test_to_physical_rechunked_21285() -> None:
1716
    # A series with multiple chunks, dtype is array or list of structs with a
1717
    # null field (causes rechunking) and a field with a different physical and
1718
    # logical repr (causes the full body of `to_physical_repr` to run).
1719
    arr_dtype = pl.Array(pl.Struct({"f0": pl.Time, "f1": pl.Null}), shape=(1,))
1720
    s = pl.Series("a", [None], arr_dtype)  # content doesn't matter
1721
    s = s.append(s)
1722
    expected_arr_dtype = pl.Array(pl.Struct({"f0": Int64, "f1": pl.Null}), shape=(1,))
1723
    expected = pl.Series("a", [None, None], expected_arr_dtype)
1724
    assert_series_equal(s.to_physical(), expected)
1725

1726
    list_dtype = pl.List(pl.Struct({"f0": pl.Time, "f1": pl.Null}))
1727
    s = pl.Series("a", [None], list_dtype)  # content doesn't matter
1728
    s = s.append(s)
1729
    expected_list_dtype = pl.List(pl.Struct({"f0": Int64, "f1": pl.Null}))
1730
    expected = pl.Series("a", [None, None], expected_list_dtype)
1731
    assert_series_equal(s.to_physical(), expected)
1732

1733

1734
def test_is_between_datetime() -> None:
1735
    s = pl.Series("a", [datetime(2020, 1, 1, 10, 0, 0), datetime(2020, 1, 1, 20, 0, 0)])
1736
    start = datetime(2020, 1, 1, 12, 0, 0)
1737
    end = datetime(2020, 1, 1, 23, 0, 0)
1738
    expected = pl.Series("a", [False, True])
1739

1740
    # only on the expression api
1741
    result = s.to_frame().with_columns(pl.col("*").is_between(start, end)).to_series()
1742
    assert_series_equal(result, expected)
1743

1744

1745
@pytest.mark.parametrize(
1746
    "f",
1747
    [
1748
        "sin",
1749
        "cos",
1750
        "tan",
1751
        "arcsin",
1752
        "arccos",
1753
        "arctan",
1754
        "sinh",
1755
        "cosh",
1756
        "tanh",
1757
        "arcsinh",
1758
        "arccosh",
1759
        "arctanh",
1760
    ],
1761
)
1762
@pytest.mark.filterwarnings("ignore:invalid value encountered:RuntimeWarning")
1763
def test_trigonometric(f: str) -> None:
1764
    s = pl.Series("a", [0.0, math.pi, None, math.nan])
1765
    expected = (
1766
        pl.Series("a", getattr(np, f)(s.to_numpy()))
1767
        .to_frame()
1768
        .with_columns(pl.when(s.is_null()).then(None).otherwise(pl.col("a")).alias("a"))
1769
        .to_series()
1770
    )
1771
    result = getattr(s, f)()
1772
    assert_series_equal(result, expected)
1773

1774

1775
@pytest.mark.filterwarnings("ignore:invalid value encountered:RuntimeWarning")
1776
def test_trigonometric_cot() -> None:
1777
    # cotangent is not available in numpy...
1778
    s = pl.Series("a", [0.0, math.pi, None, math.nan])
1779
    expected = pl.Series("a", [math.inf, -8.1656e15, None, math.nan])
1780
    assert_series_equal(s.cot(), expected)
1781

1782

1783
def test_trigonometric_invalid_input() -> None:
1784
    # String
1785
    s = pl.Series("a", ["1", "2", "3"])
1786
    with pytest.raises(InvalidOperationError):
1787
        s.sin()
1788

1789
    # Date
1790
    s = pl.Series("a", [date(1990, 2, 28), date(2022, 7, 26)])
1791
    with pytest.raises(InvalidOperationError):
1792
        s.cosh()
1793

1794

1795
@pytest.mark.parametrize("dtype", INTEGER_DTYPES)
1796
def test_product_ints(dtype: PolarsDataType) -> None:
1797
    a = pl.Series("a", [1, 2, 3], dtype=dtype)
1798
    out = a.product()
1799
    assert out == 6
1800
    a = pl.Series("a", [1, 2, None], dtype=dtype)
1801
    out = a.product()
1802
    assert out == 2
1803
    a = pl.Series("a", [None, 2, 3], dtype=dtype)
1804
    out = a.product()
1805
    assert out == 6
1806

1807

1808
@pytest.mark.parametrize("dtype", FLOAT_DTYPES)
1809
def test_product_floats(dtype: PolarsDataType) -> None:
1810
    a = pl.Series("a", [], dtype=dtype)
1811
    out = a.product()
1812
    assert out == 1
1813
    a = pl.Series("a", [None, None], dtype=dtype)
1814
    out = a.product()
1815
    assert out == 1
1816
    a = pl.Series("a", [3.0, None, float("nan")], dtype=dtype)
1817
    out = a.product()
1818
    assert math.isnan(out)
1819

1820

1821
def test_ceil() -> None:
1822
    s = pl.Series([1.8, 1.2, 3.0])
1823
    expected = pl.Series([2.0, 2.0, 3.0])
1824
    assert_series_equal(s.ceil(), expected)
1825

1826

1827
def test_duration_arithmetic() -> None:
1828
    # apply some basic duration math to series
1829
    s = pl.Series([datetime(2022, 1, 1, 10, 20, 30), datetime(2022, 1, 2, 20, 40, 50)])
1830
    d1 = pl.duration(days=5, microseconds=123456)
1831
    d2 = timedelta(days=5, microseconds=123456)
1832

1833
    expected_values = [
1834
        datetime(2022, 1, 6, 10, 20, 30, 123456),
1835
        datetime(2022, 1, 7, 20, 40, 50, 123456),
1836
    ]
1837
    for d in (d1, d2):
1838
        df1 = pl.select((s + d).alias("d_offset"))
1839
        df2 = pl.select((d + s).alias("d_offset"))
1840
        assert df1["d_offset"].to_list() == expected_values
1841
        assert_series_equal(df1["d_offset"], df2["d_offset"])
1842

1843

1844
def test_mean_overflow() -> None:
1845
    arr = np.array([255] * (1 << 17), dtype="int16")
1846
    assert arr.mean() == 255.0
1847

1848

1849
def test_sign() -> None:
1850
    # Integers
1851
    a = pl.Series("a", [-9, -0, 0, 4, None])
1852
    expected = pl.Series("a", [-1, 0, 0, 1, None])
1853
    assert_series_equal(a.sign(), expected)
1854

1855
    # Floats
1856
    a = pl.Series("a", [-9.0, -0.0, 0.0, 4.0, float("nan"), None])
1857
    expected = pl.Series("a", [-1.0, 0.0, 0.0, 1.0, float("nan"), None])
1858
    assert_series_equal(a.sign(), expected)
1859

1860
    # Invalid input
1861
    a = pl.Series("a", [date(1950, 2, 1), date(1970, 1, 1), date(2022, 12, 12), None])
1862
    with pytest.raises(InvalidOperationError):
1863
        a.sign()
1864

1865

1866
def test_exp() -> None:
1867
    s = pl.Series("a", [0.1, 0.01, None])
1868
    expected = pl.Series("a", [1.1051709180756477, 1.010050167084168, None])
1869
    assert_series_equal(s.exp(), expected)
1870
    # test if we can run on empty series as well.
1871
    assert s[:0].exp().to_list() == []
1872

1873

1874
def test_cumulative_eval() -> None:
1875
    s = pl.Series("values", [1, 2, 3, 4, 5])
1876

1877
    # evaluate expressions individually
1878
    expr1 = pl.element().first()
1879
    expr2 = pl.element().last() ** 2
1880

1881
    expected1 = pl.Series("values", [1, 1, 1, 1, 1])
1882
    expected2 = pl.Series("values", [1, 4, 9, 16, 25])
1883
    assert_series_equal(s.cumulative_eval(expr1), expected1)
1884
    assert_series_equal(s.cumulative_eval(expr2), expected2)
1885

1886
    # evaluate combined expressions and validate
1887
    expr3 = expr1 - expr2
1888
    expected3 = pl.Series("values", [0, -3, -8, -15, -24])
1889
    assert_series_equal(s.cumulative_eval(expr3), expected3)
1890

1891

1892
def test_clip() -> None:
1893
    s = pl.Series("foo", [-50, 5, None, 50])
1894
    assert s.clip(1, 10).to_list() == [1, 5, None, 10]
1895

1896

1897
def test_repr() -> None:
1898
    s = pl.Series("ints", [1001, 2002, 3003])
1899
    s_repr = repr(s)
1900

1901
    assert "shape: (3,)" in s_repr
1902
    assert "Series: 'ints' [i64]" in s_repr
1903
    for n in s.to_list():
1904
        assert str(n) in s_repr
1905

1906
    class XSeries(pl.Series):
1907
        """Custom Series class."""
1908

1909
    # check custom class name reflected in repr output
1910
    x = XSeries("ints", [1001, 2002, 3003])
1911
    x_repr = repr(x)
1912

1913
    assert "shape: (3,)" in x_repr
1914
    assert "XSeries: 'ints' [i64]" in x_repr
1915
    assert "1001" in x_repr
1916
    for n in x.to_list():
1917
        assert str(n) in x_repr
1918

1919

1920
def test_repr_html(df: pl.DataFrame) -> None:
1921
    # check it does not panic/error, and appears to contain a table
1922
    html = pl.Series("misc", [123, 456, 789])._repr_html_()
1923
    assert "<table" in html
1924

1925

1926
@pytest.mark.parametrize(
1927
    ("value", "time_unit", "exp", "exp_type"),
1928
    [
1929
        (13285, "d", date(2006, 5, 17), pl.Date),
1930
        (1147880044, "s", datetime(2006, 5, 17, 15, 34, 4), pl.Datetime),
1931
        (1147880044 * 1_000, "ms", datetime(2006, 5, 17, 15, 34, 4), pl.Datetime("ms")),
1932
        (
1933
            1147880044 * 1_000_000,
1934
            "us",
1935
            datetime(2006, 5, 17, 15, 34, 4),
1936
            pl.Datetime("us"),
1937
        ),
1938
        (
1939
            1147880044 * 1_000_000_000,
1940
            "ns",
1941
            datetime(2006, 5, 17, 15, 34, 4),
1942
            pl.Datetime("ns"),
1943
        ),
1944
    ],
1945
)
1946
def test_from_epoch_expr(
1947
    value: int,
1948
    time_unit: EpochTimeUnit,
1949
    exp: date | datetime,
1950
    exp_type: PolarsDataType,
1951
) -> None:
1952
    s = pl.Series("timestamp", [value, None])
1953
    result = pl.from_epoch(s, time_unit=time_unit)
1954

1955
    expected = pl.Series("timestamp", [exp, None]).cast(exp_type)
1956
    assert_series_equal(result, expected)
1957

1958

1959
def test_get_chunks() -> None:
1960
    a = pl.Series("a", [1, 2])
1961
    b = pl.Series("a", [3, 4])
1962
    chunks = pl.concat([a, b], rechunk=False).get_chunks()
1963
    assert_series_equal(chunks[0], a)
1964
    assert_series_equal(chunks[1], b)
1965

1966

1967
def test_null_comparisons() -> None:
1968
    s = pl.Series("s", [None, "str", "a"])
1969
    assert (s.shift() == s).null_count() == 2
1970
    assert (s.shift() != s).null_count() == 2
1971

1972

1973
def test_min_max_agg_on_str() -> None:
1974
    strings = ["b", "a", "x"]
1975
    s = pl.Series(strings)
1976
    assert (s.min(), s.max()) == ("a", "x")
1977

1978

1979
def test_min_max_full_nan_15058() -> None:
1980
    s = pl.Series([float("nan")] * 2)
1981
    assert all(x != x for x in [s.min(), s.max()])
1982

1983

1984
def test_is_between() -> None:
1985
    s = pl.Series("num", [1, 2, None, 4, 5])
1986
    assert s.is_between(2, 4).to_list() == [False, True, None, True, False]
1987

1988
    s = pl.Series("num", [1, 2, None, 4, 5])
1989
    assert s.is_between(2, 4, closed="left").to_list() == [
1990
        False,
1991
        True,
1992
        None,
1993
        False,
1994
        False,
1995
    ]
1996

1997
    s = pl.Series("num", [1, 2, None, 4, 5])
1998
    assert s.is_between(2, 4, closed="right").to_list() == [
1999
        False,
2000
        False,
2001
        None,
2002
        True,
2003
        False,
2004
    ]
2005

2006
    s = pl.Series("num", [1, 2, None, 4, 5])
2007
    assert s.is_between(pl.lit(2) / 2, pl.lit(4) * 2, closed="both").to_list() == [
2008
        True,
2009
        True,
2010
        None,
2011
        True,
2012
        True,
2013
    ]
2014

2015
    s = pl.Series("s", ["a", "b", "c", "d", "e"])
2016
    assert s.is_between("b", "d").to_list() == [
2017
        False,
2018
        True,
2019
        True,
2020
        True,
2021
        False,
2022
    ]
2023

2024

2025
@pytest.mark.parametrize(
2026
    ("dtype", "lower", "upper"),
2027
    [
2028
        (pl.Int8, -128, 127),
2029
        (pl.UInt8, 0, 255),
2030
        (pl.Int16, -32768, 32767),
2031
        (pl.UInt16, 0, 65535),
2032
        (pl.Int32, -2147483648, 2147483647),
2033
        (pl.UInt32, 0, 4294967295),
2034
        (pl.Int64, -9223372036854775808, 9223372036854775807),
2035
        (pl.UInt64, 0, 18446744073709551615),
2036
        (pl.Float32, float("-inf"), float("inf")),
2037
        (pl.Float64, float("-inf"), float("inf")),
2038
    ],
2039
)
2040
def test_upper_lower_bounds(
2041
    dtype: PolarsDataType, upper: int | float, lower: int | float
2042
) -> None:
2043
    s = pl.Series("s", dtype=dtype)
2044
    assert s.lower_bound().item() == lower
2045
    assert s.upper_bound().item() == upper
2046

2047

2048
def test_numpy_series_arithmetic() -> None:
2049
    sx = pl.Series(values=[1, 2])
2050
    y = np.array([3.0, 4.0])
2051

2052
    result_add1 = y + sx
2053
    result_add2 = sx + y
2054
    expected_add = pl.Series([4.0, 6.0], dtype=pl.Float64)
2055
    assert_series_equal(result_add1, expected_add)  # type: ignore[arg-type]
2056
    assert_series_equal(result_add2, expected_add)
2057

2058
    result_sub1 = cast("pl.Series", y - sx)  # py37 is different vs py311 on this one
2059
    expected = pl.Series([2.0, 2.0], dtype=pl.Float64)
2060
    assert_series_equal(result_sub1, expected)
2061
    result_sub2 = sx - y
2062
    expected = pl.Series([-2.0, -2.0], dtype=pl.Float64)
2063
    assert_series_equal(result_sub2, expected)
2064

2065
    result_mul1 = y * sx
2066
    result_mul2 = sx * y
2067
    expected = pl.Series([3.0, 8.0], dtype=pl.Float64)
2068
    assert_series_equal(result_mul1, expected)  # type: ignore[arg-type]
2069
    assert_series_equal(result_mul2, expected)
2070

2071
    result_div1 = y / sx
2072
    expected = pl.Series([3.0, 2.0], dtype=pl.Float64)
2073
    assert_series_equal(result_div1, expected)  # type: ignore[arg-type]
2074
    result_div2 = sx / y
2075
    expected = pl.Series([1 / 3, 0.5], dtype=pl.Float64)
2076
    assert_series_equal(result_div2, expected)
2077

2078
    result_pow1 = y**sx
2079
    expected = pl.Series([3.0, 16.0], dtype=pl.Float64)
2080
    assert_series_equal(result_pow1, expected)  # type: ignore[arg-type]
2081
    result_pow2 = sx**y
2082
    expected = pl.Series([1.0, 16.0], dtype=pl.Float64)
2083
    assert_series_equal(result_pow2, expected)  # type: ignore[arg-type]
2084

2085

2086
def test_from_epoch_seq_input() -> None:
2087
    seq_input = [1147880044]
2088
    expected = pl.Series([datetime(2006, 5, 17, 15, 34, 4)])
2089
    result = pl.from_epoch(seq_input)
2090
    assert_series_equal(result, expected)
2091

2092

2093
def test_symmetry_for_max_in_names() -> None:
2094
    # int
2095
    a = pl.Series("a", [1])
2096
    assert (a - a.max()).name == (a.max() - a).name == a.name  # type: ignore[union-attr]
2097
    # float
2098
    a = pl.Series("a", [1.0])
2099
    assert (a - a.max()).name == (a.max() - a).name == a.name  # type: ignore[union-attr]
2100
    # duration
2101
    a = pl.Series("a", [1], dtype=pl.Duration("ns"))
2102
    assert (a - a.max()).name == (a.max() - a).name == a.name  # type: ignore[union-attr]
2103
    # datetime
2104
    a = pl.Series("a", [1], dtype=pl.Datetime("ns"))
2105
    assert (a - a.max()).name == (a.max() - a).name == a.name  # type: ignore[union-attr]
2106

2107
    # TODO: time arithmetic support?
2108
    # a = pl.Series("a", [1], dtype=pl.Time)
2109
    # assert (a - a.max()).name == (a.max() - a).name == a.name
2110

2111

2112
def test_series_getitem_out_of_bounds_positive() -> None:
2113
    s = pl.Series([1, 2])
2114
    with pytest.raises(
2115
        IndexError, match="index 10 is out of bounds for sequence of length 2"
2116
    ):
2117
        s[10]
2118

2119

2120
def test_series_getitem_out_of_bounds_negative() -> None:
2121
    s = pl.Series([1, 2])
2122
    with pytest.raises(
2123
        IndexError, match="index -10 is out of bounds for sequence of length 2"
2124
    ):
2125
        s[-10]
2126

2127

2128
def test_series_cmp_fast_paths() -> None:
2129
    assert (
2130
        pl.Series([None], dtype=pl.Int32) != pl.Series([1, 2], dtype=pl.Int32)
2131
    ).to_list() == [None, None]
2132
    assert (
2133
        pl.Series([None], dtype=pl.Int32) == pl.Series([1, 2], dtype=pl.Int32)
2134
    ).to_list() == [None, None]
2135

2136
    assert (
2137
        pl.Series([None], dtype=pl.String) != pl.Series(["a", "b"], dtype=pl.String)
2138
    ).to_list() == [None, None]
2139
    assert (
2140
        pl.Series([None], dtype=pl.String) == pl.Series(["a", "b"], dtype=pl.String)
2141
    ).to_list() == [None, None]
2142

2143
    assert (
2144
        pl.Series([None], dtype=pl.Boolean)
2145
        != pl.Series([True, False], dtype=pl.Boolean)
2146
    ).to_list() == [None, None]
2147
    assert (
2148
        pl.Series([None], dtype=pl.Boolean)
2149
        == pl.Series([False, False], dtype=pl.Boolean)
2150
    ).to_list() == [None, None]
2151

2152

2153
def test_comp_series_with_str_13123() -> None:
2154
    s = pl.Series(["1", "2", None])
2155
    assert_series_equal(s != "1", pl.Series([False, True, None]))
2156
    assert_series_equal(s == "1", pl.Series([True, False, None]))
2157
    assert_series_equal(s.eq_missing("1"), pl.Series([True, False, False]))
2158
    assert_series_equal(s.ne_missing("1"), pl.Series([False, True, True]))
2159

2160

2161
@pytest.mark.parametrize(
2162
    ("data", "single", "multiple", "single_expected", "multiple_expected"),
2163
    [
2164
        ([1, 2, 3], 1, [2, 4], 0, [1, 3]),
2165
        (["a", "b", "c"], "d", ["a", "d"], 3, [0, 3]),
2166
        ([b"a", b"b", b"c"], b"d", [b"a", b"d"], 3, [0, 3]),
2167
        (
2168
            [date(2022, 1, 2), date(2023, 4, 1)],
2169
            date(2022, 1, 1),
2170
            [date(1999, 10, 1), date(2024, 1, 1)],
2171
            0,
2172
            [0, 2],
2173
        ),
2174
        ([1, 2, 3], 1, np.array([2, 4]), 0, [1, 3]),  # test np array.
2175
    ],
2176
)
2177
def test_search_sorted(
2178
    data: list[Any],
2179
    single: Any,
2180
    multiple: list[Any],
2181
    single_expected: Any,
2182
    multiple_expected: list[Any],
2183
) -> None:
2184
    s = pl.Series(data)
2185
    single_s = s.search_sorted(single)
2186
    assert single_s == single_expected
2187

2188
    multiple_s = s.search_sorted(multiple)
2189
    assert_series_equal(multiple_s, pl.Series(multiple_expected, dtype=pl.UInt32))
2190

2191

2192
def test_series_from_pandas_with_dtype() -> None:
2193
    expected = pl.Series("foo", [1, 2, 3], dtype=pl.Int8)
2194
    s = pl.Series("foo", pd.Series([1, 2, 3]), pl.Int8)
2195
    assert_series_equal(s, expected)
2196
    s = pl.Series("foo", pd.Series([1, 2, 3], dtype="Int16"), pl.Int8)
2197
    assert_series_equal(s, expected)
2198

2199
    with pytest.raises(InvalidOperationError, match="conversion from"):
2200
        pl.Series("foo", pd.Series([-1, 2, 3]), pl.UInt8)
2201
    s = pl.Series("foo", pd.Series([-1, 2, 3]), pl.UInt8, strict=False)
2202
    assert s.to_list() == [None, 2, 3]
2203
    assert s.dtype == pl.UInt8
2204

2205
    with pytest.raises(InvalidOperationError, match="conversion from"):
2206
        pl.Series("foo", pd.Series([-1, 2, 3], dtype="Int8"), pl.UInt8)
2207
    s = pl.Series("foo", pd.Series([-1, 2, 3], dtype="Int8"), pl.UInt8, strict=False)
2208
    assert s.to_list() == [None, 2, 3]
2209
    assert s.dtype == pl.UInt8
2210

2211

2212
def test_series_from_pyarrow_with_dtype() -> None:
2213
    s = pl.Series("foo", pa.array([-1, 2, 3]), pl.Int8)
2214
    assert_series_equal(s, pl.Series("foo", [-1, 2, 3], dtype=pl.Int8))
2215

2216
    with pytest.raises(InvalidOperationError, match="conversion from"):
2217
        pl.Series("foo", pa.array([-1, 2, 3]), pl.UInt8)
2218

2219
    s = pl.Series("foo", pa.array([-1, 2, 3]), dtype=pl.UInt8, strict=False)
2220
    assert s.to_list() == [None, 2, 3]
2221
    assert s.dtype == pl.UInt8
2222

2223

2224
def test_series_from_numpy_with_dtype() -> None:
2225
    s = pl.Series("foo", np.array([-1, 2, 3]), pl.Int8)
2226
    assert_series_equal(s, pl.Series("foo", [-1, 2, 3], dtype=pl.Int8))
2227

2228
    with pytest.raises(InvalidOperationError, match="conversion from"):
2229
        pl.Series("foo", np.array([-1, 2, 3]), pl.UInt8)
2230

2231
    s = pl.Series("foo", np.array([-1, 2, 3]), dtype=pl.UInt8, strict=False)
2232
    assert s.to_list() == [None, 2, 3]
2233
    assert s.dtype == pl.UInt8
2234

2235

2236
def test_raise_invalid_is_between() -> None:
2237
    with pytest.raises(pl.exceptions.InvalidOperationError):
2238
        pl.select(pl.lit(2).is_between(pl.lit("11"), pl.lit("33")))
2239

2240

2241
def test_construction_large_nested_u64_17231() -> None:
2242
    import polars as pl
2243

2244
    values = [{"f0": [9223372036854775808]}]
2245
    dtype = pl.Struct({"f0": pl.List(pl.UInt64)})
2246
    assert pl.Series(values, dtype=dtype).to_list() == values
2247

2248

2249
def test_repeat_by() -> None:
2250
    calculated = pl.select(a=pl.Series("a", [1, 2]).repeat_by(2))
2251
    expected = pl.select(a=pl.Series("a", [[1, 1], [2, 2]]))
2252
    assert calculated.equals(expected)
2253

2254

2255
def test_is_close() -> None:
2256
    a = pl.Series(
2257
        "a",
2258
        [
2259
            1.0,
2260
            1.0,
2261
            float("-inf"),
2262
            float("inf"),
2263
            float("inf"),
2264
            float("inf"),
2265
            float("nan"),
2266
        ],
2267
    )
2268
    b = pl.Series(
2269
        "b", [1.3, 1.7, float("-inf"), float("inf"), float("-inf"), 1.0, float("nan")]
2270
    )
2271
    assert a.is_close(b, abs_tol=0.5).to_list() == [
2272
        True,
2273
        False,
2274
        True,
2275
        True,
2276
        False,
2277
        False,
2278
        False,
2279
    ]
2280

2281

2282
def test_is_close_literal() -> None:
2283
    a = pl.Series("a", [1.1, 1.2, 1.3, 1.4, float("inf"), float("nan")])
2284
    assert a.is_close(1.2).to_list() == [False, True, False, False, False, False]
2285

2286

2287
def test_is_close_nans_equal() -> None:
2288
    a = pl.Series("a", [1.0, float("nan")])
2289
    b = pl.Series("b", [2.0, float("nan")])
2290
    assert a.is_close(b, nans_equal=True).to_list() == [False, True]
2291

2292

2293
def test_is_close_invalid_abs_tol() -> None:
2294
    with pytest.raises(pl.exceptions.ComputeError):
2295
        pl.select(pl.lit(1.0).is_close(1, abs_tol=-1.0))
2296

2297

2298
def test_is_close_invalid_rel_tol() -> None:
2299
    with pytest.raises(pl.exceptions.ComputeError):
2300
        pl.select(pl.lit(1.0).is_close(1, rel_tol=-1.0))
2301

2302

2303
def test_comparisons_structs_raise() -> None:
2304
    s = pl.Series([{"x": 1}, {"x": 2}, {"x": 3}])
2305
    rhss = ["", " ", 5, {"x": 1}]
2306
    for rhs in rhss:
2307
        with pytest.raises(
2308
            NotImplementedError,
2309
            match=r"Series of type Struct\(\{'x': Int64\}\) does not have eq operator",
2310
        ):
2311
            s == rhs  # noqa: B015
2312

2313
Product

Resources

Company