Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/series/test_series.py
6939 views
1
from __future__ import annotations
2
3
import math
4
from datetime import date, datetime, time, timedelta
5
from typing import TYPE_CHECKING, Any, cast
6
from zoneinfo import ZoneInfo
7
8
import numpy as np
9
import pandas as pd
10
import pyarrow as pa
11
import pytest
12
13
import polars as pl
14
from polars._utils.construction import iterable_to_pyseries
15
from polars.datatypes import (
16
Datetime,
17
Field,
18
Float64,
19
Int32,
20
Int64,
21
Time,
22
UInt32,
23
UInt64,
24
Unknown,
25
)
26
from polars.exceptions import (
27
DuplicateError,
28
InvalidOperationError,
29
PolarsInefficientMapWarning,
30
ShapeError,
31
)
32
from polars.testing import assert_frame_equal, assert_series_equal
33
from tests.unit.conftest import FLOAT_DTYPES, INTEGER_DTYPES
34
from tests.unit.utils.pycapsule_utils import PyCapsuleStreamHolder
35
36
if TYPE_CHECKING:
37
from collections.abc import Iterator
38
39
from polars._typing import EpochTimeUnit, PolarsDataType, TimeUnit
40
41
42
def test_cum_agg() -> None:
43
# confirm that known series give expected results
44
s = pl.Series("a", [1, 2, 3, 2])
45
assert_series_equal(s.cum_sum(), pl.Series("a", [1, 3, 6, 8]))
46
assert_series_equal(s.cum_min(), pl.Series("a", [1, 1, 1, 1]))
47
assert_series_equal(s.cum_max(), pl.Series("a", [1, 2, 3, 3]))
48
assert_series_equal(s.cum_prod(), pl.Series("a", [1, 2, 6, 12]))
49
50
51
def test_cum_agg_with_nulls() -> None:
52
# confirm that known series give expected results
53
s = pl.Series("a", [None, 2, None, 7, 8, None])
54
assert_series_equal(s.cum_sum(), pl.Series("a", [None, 2, None, 9, 17, None]))
55
assert_series_equal(s.cum_min(), pl.Series("a", [None, 2, None, 2, 2, None]))
56
assert_series_equal(s.cum_max(), pl.Series("a", [None, 2, None, 7, 8, None]))
57
assert_series_equal(s.cum_prod(), pl.Series("a", [None, 2, None, 14, 112, None]))
58
59
60
def test_cum_agg_with_infs() -> None:
61
# confirm that inf values are handled correctly
62
s = pl.Series([float("inf"), 0.0, 1.0])
63
assert_series_equal(s.cum_min(), pl.Series([float("inf"), 0.0, 0.0]))
64
65
s = pl.Series([float("-inf"), 0.0, 1.0])
66
assert_series_equal(s.cum_max(), pl.Series([float("-inf"), 0.0, 1.0]))
67
68
69
def test_cum_min_max_bool() -> None:
70
s = pl.Series("a", [None, True, True, None, False, None, True, False, False, None])
71
assert_series_equal(s.cum_min().cast(pl.Int32), s.cast(pl.Int32).cum_min())
72
assert_series_equal(s.cum_max().cast(pl.Int32), s.cast(pl.Int32).cum_max())
73
assert_series_equal(
74
s.cum_min(reverse=True).cast(pl.Int32), s.cast(pl.Int32).cum_min(reverse=True)
75
)
76
assert_series_equal(
77
s.cum_max(reverse=True).cast(pl.Int32), s.cast(pl.Int32).cum_max(reverse=True)
78
)
79
80
81
def test_init_inputs(monkeypatch: Any) -> None:
82
nan = float("nan")
83
# Good inputs
84
pl.Series("a", [1, 2])
85
pl.Series("a", values=[1, 2])
86
pl.Series(name="a", values=[1, 2])
87
pl.Series(values=[1, 2], name="a")
88
89
assert pl.Series([1, 2]).dtype == pl.Int64
90
assert pl.Series(values=[1, 2]).dtype == pl.Int64
91
assert pl.Series("a").dtype == pl.Null # Null dtype used in case of no data
92
assert pl.Series().dtype == pl.Null
93
assert pl.Series([]).dtype == pl.Null
94
assert (
95
pl.Series([None, None, None]).dtype == pl.Null
96
) # f32 type used for list with only None
97
assert pl.Series(values=[True, False]).dtype == pl.Boolean
98
assert pl.Series(values=np.array([True, False])).dtype == pl.Boolean
99
assert pl.Series(values=np.array(["foo", "bar"])).dtype == pl.String
100
assert pl.Series(values=["foo", "bar"]).dtype == pl.String
101
assert pl.Series("a", [pl.Series([1, 2, 4]), pl.Series([3, 2, 1])]).dtype == pl.List
102
assert pl.Series("a", [10000, 20000, 30000], dtype=pl.Time).dtype == pl.Time
103
104
# 2d numpy array and/or list of 1d numpy arrays
105
for res in (
106
pl.Series(
107
name="a",
108
values=np.array([[1, 2], [3, nan]], dtype=np.float32),
109
nan_to_null=True,
110
),
111
pl.Series(
112
name="a",
113
values=[
114
np.array([1, 2], dtype=np.float32),
115
np.array([3, nan], dtype=np.float32),
116
],
117
nan_to_null=True,
118
),
119
pl.Series(
120
name="a",
121
values=(
122
np.ndarray((2,), np.float32, np.array([1, 2], dtype=np.float32)),
123
np.ndarray((2,), np.float32, np.array([3, nan], dtype=np.float32)),
124
),
125
nan_to_null=True,
126
),
127
):
128
assert res.dtype == pl.Array(pl.Float32, shape=2)
129
assert res[0].to_list() == [1.0, 2.0]
130
assert res[1].to_list() == [3.0, None]
131
132
# numpy from arange, with/without dtype
133
two_ints = np.arange(2, dtype=np.int64)
134
three_ints = np.arange(3, dtype=np.int64)
135
for res in (
136
pl.Series("a", [two_ints, three_ints]),
137
pl.Series("a", [two_ints, three_ints], dtype=pl.List(pl.Int64)),
138
):
139
assert res.dtype == pl.List(pl.Int64)
140
assert res.to_list() == [[0, 1], [0, 1, 2]]
141
142
assert pl.Series(
143
values=np.array([["foo", "bar"], ["foo2", "bar2"]])
144
).dtype == pl.Array(pl.String, shape=2)
145
146
# lists
147
assert pl.Series("a", [[1, 2], [3, 4]]).dtype == pl.List(pl.Int64)
148
149
# conversion of Date to Datetime
150
s = pl.Series([date(2023, 1, 1), date(2023, 1, 2)], dtype=pl.Datetime)
151
assert s.to_list() == [datetime(2023, 1, 1), datetime(2023, 1, 2)]
152
assert Datetime == s.dtype
153
assert s.dtype.time_unit == "us" # type: ignore[attr-defined]
154
assert s.dtype.time_zone is None # type: ignore[attr-defined]
155
156
# conversion of Date to Datetime with specified timezone and units
157
tu: TimeUnit = "ms"
158
tz = "America/Argentina/Rio_Gallegos"
159
s = pl.Series(
160
[date(2023, 1, 1), date(2023, 1, 2)], dtype=pl.Datetime(tu)
161
).dt.replace_time_zone(tz)
162
d1 = datetime(2023, 1, 1, 0, 0, 0, 0, ZoneInfo(tz))
163
d2 = datetime(2023, 1, 2, 0, 0, 0, 0, ZoneInfo(tz))
164
assert s.to_list() == [d1, d2]
165
assert Datetime == s.dtype
166
assert s.dtype.time_unit == tu # type: ignore[attr-defined]
167
assert s.dtype.time_zone == tz # type: ignore[attr-defined]
168
169
# datetime64: check timeunit (auto-detect, implicit/explicit) and NaT
170
d64 = pd.date_range(date(2021, 8, 1), date(2021, 8, 3)).values
171
d64[1] = None
172
173
expected = [datetime(2021, 8, 1, 0), None, datetime(2021, 8, 3, 0)]
174
for dtype in (None, Datetime, Datetime("ns")):
175
s = pl.Series("dates", d64, dtype)
176
assert s.to_list() == expected
177
assert Datetime == s.dtype
178
assert s.dtype.time_unit == "ns" # type: ignore[attr-defined]
179
180
s = pl.Series(values=d64.astype("<M8[ms]"))
181
assert s.dtype.time_unit == "ms" # type: ignore[attr-defined]
182
assert expected == s.to_list()
183
184
# pandas
185
assert pl.Series(pd.Series([1, 2])).dtype == pl.Int64
186
187
# Bad inputs
188
with pytest.raises(TypeError):
189
pl.Series([1, 2, 3], [1, 2, 3])
190
with pytest.raises(TypeError):
191
pl.Series({"a": [1, 2, 3]})
192
with pytest.raises(OverflowError):
193
pl.Series("bigint", [2**128])
194
195
# numpy not available
196
monkeypatch.setattr(pl.series.series, "_check_for_numpy", lambda x: False)
197
with pytest.raises(TypeError):
198
pl.DataFrame(np.array([1, 2, 3]), schema=["a"])
199
200
201
def test_init_structured_objects() -> None:
202
# validate init from dataclass, namedtuple, and pydantic model objects
203
from typing import NamedTuple
204
205
from polars.dependencies import dataclasses, pydantic
206
207
@dataclasses.dataclass
208
class TeaShipmentDC:
209
exporter: str
210
importer: str
211
product: str
212
tonnes: int | None
213
214
class TeaShipmentNT(NamedTuple):
215
exporter: str
216
importer: str
217
product: str
218
tonnes: None | int
219
220
class TeaShipmentPD(pydantic.BaseModel):
221
exporter: str
222
importer: str
223
product: str
224
tonnes: int
225
226
for Tea in (TeaShipmentDC, TeaShipmentNT, TeaShipmentPD):
227
t0 = Tea(exporter="Sri Lanka", importer="USA", product="Ceylon", tonnes=10)
228
t1 = Tea(exporter="India", importer="UK", product="Darjeeling", tonnes=25)
229
t2 = Tea(exporter="China", importer="UK", product="Keemum", tonnes=40)
230
231
s = pl.Series("t", [t0, t1, t2])
232
233
assert isinstance(s, pl.Series)
234
assert s.dtype.fields == [ # type: ignore[attr-defined]
235
Field("exporter", pl.String),
236
Field("importer", pl.String),
237
Field("product", pl.String),
238
Field("tonnes", pl.Int64),
239
]
240
assert s.to_list() == [
241
{
242
"exporter": "Sri Lanka",
243
"importer": "USA",
244
"product": "Ceylon",
245
"tonnes": 10,
246
},
247
{
248
"exporter": "India",
249
"importer": "UK",
250
"product": "Darjeeling",
251
"tonnes": 25,
252
},
253
{
254
"exporter": "China",
255
"importer": "UK",
256
"product": "Keemum",
257
"tonnes": 40,
258
},
259
]
260
assert_frame_equal(s.to_frame(), pl.DataFrame({"t": [t0, t1, t2]}))
261
262
263
def test_to_frame() -> None:
264
s1 = pl.Series([1, 2])
265
s2 = pl.Series("s", [1, 2])
266
267
df1 = s1.to_frame()
268
df2 = s2.to_frame()
269
df3 = s1.to_frame("xyz")
270
df4 = s2.to_frame("xyz")
271
272
for df, name in ((df1, ""), (df2, "s"), (df3, "xyz"), (df4, "xyz")):
273
assert isinstance(df, pl.DataFrame)
274
assert df.rows() == [(1,), (2,)]
275
assert df.columns == [name]
276
277
# note: the empty string IS technically a valid column name
278
assert s2.to_frame("").columns == [""]
279
assert s2.name == "s"
280
281
282
def test_bitwise_ops() -> None:
283
a = pl.Series([True, False, True])
284
b = pl.Series([False, True, True])
285
assert_series_equal((a & b), pl.Series([False, False, True]))
286
assert_series_equal((a | b), pl.Series([True, True, True]))
287
assert_series_equal((a ^ b), pl.Series([True, True, False]))
288
assert_series_equal((~a), pl.Series([False, True, False]))
289
290
# rand/rxor/ror we trigger by casting the left hand to a list here in the test
291
# Note that the type annotations only allow Series to be passed in, but there is
292
# specific code to deal with non-Series inputs.
293
assert_series_equal(
294
(True & a),
295
pl.Series([True, False, True]),
296
)
297
assert_series_equal(
298
(True | a),
299
pl.Series([True, True, True]),
300
)
301
assert_series_equal(
302
(True ^ a),
303
pl.Series([False, True, False]),
304
)
305
306
307
def test_bitwise_floats_invert() -> None:
308
s = pl.Series([2.0, 3.0, 0.0])
309
310
with pytest.raises(InvalidOperationError):
311
~s
312
313
314
def test_equality() -> None:
315
a = pl.Series("a", [1, 2])
316
b = a
317
318
cmp = a == b
319
assert isinstance(cmp, pl.Series)
320
assert cmp.sum() == 2
321
assert (a != b).sum() == 0
322
assert (a >= b).sum() == 2
323
assert (a <= b).sum() == 2
324
assert (a > b).sum() == 0
325
assert (a < b).sum() == 0
326
assert a.sum() == 3
327
assert_series_equal(a, b)
328
329
a = pl.Series("name", ["ham", "foo", "bar"])
330
assert_series_equal((a == "ham"), pl.Series("name", [True, False, False]))
331
332
a = pl.Series("name", [[1], [1, 2], [2, 3]])
333
assert_series_equal((a == [1]), pl.Series("name", [True, False, False]))
334
335
336
def test_agg() -> None:
337
series = pl.Series("a", [1, 2])
338
assert series.mean() == 1.5
339
assert series.min() == 1
340
assert series.max() == 2
341
342
343
def test_date_agg() -> None:
344
series = pl.Series(
345
[
346
date(2022, 8, 2),
347
date(2096, 8, 1),
348
date(9009, 9, 9),
349
],
350
dtype=pl.Date,
351
)
352
assert series.min() == date(2022, 8, 2)
353
assert series.max() == date(9009, 9, 9)
354
355
356
@pytest.mark.parametrize(
357
("s", "min", "max"),
358
[
359
(pl.Series(["c", "b", "a"], dtype=pl.Categorical("lexical")), "a", "c"),
360
(pl.Series([None, "a", "c", "b"], dtype=pl.Categorical("lexical")), "a", "c"),
361
(pl.Series([], dtype=pl.Categorical("lexical")), None, None),
362
(pl.Series(["c", "b", "a"], dtype=pl.Enum(["c", "b", "a"])), "c", "a"),
363
(pl.Series(["c", "b", "a"], dtype=pl.Enum(["c", "b", "a", "d"])), "c", "a"),
364
],
365
)
366
def test_categorical_agg(s: pl.Series, min: str | None, max: str | None) -> None:
367
assert s.min() == min
368
assert s.max() == max
369
370
371
def test_add_string() -> None:
372
s = pl.Series(["hello", "weird"])
373
result = s + " world"
374
print(result)
375
assert_series_equal(result, pl.Series(["hello world", "weird world"]))
376
377
result = "pfx:" + s
378
assert_series_equal(result, pl.Series("literal", ["pfx:hello", "pfx:weird"]))
379
380
381
@pytest.mark.parametrize(
382
("data", "expected_dtype"),
383
[
384
(100, pl.Int64),
385
(8.5, pl.Float64),
386
("서울특별시", pl.String),
387
(date.today(), pl.Date),
388
(datetime.now(), pl.Datetime("us")),
389
(time(23, 59, 59), pl.Time),
390
(timedelta(hours=7, seconds=123), pl.Duration("us")),
391
],
392
)
393
def test_unknown_dtype(data: Any, expected_dtype: PolarsDataType) -> None:
394
# if given 'Unknown', should be able to infer the correct dtype
395
s = pl.Series([data], dtype=Unknown)
396
assert s.dtype == expected_dtype
397
assert s.to_list() == [data]
398
399
400
def test_various() -> None:
401
a = pl.Series("a", [1, 2])
402
assert a.is_null().sum() == 0
403
assert a.name == "a"
404
405
a = a.rename("b")
406
assert a.name == "b"
407
assert a.len() == 2
408
assert len(a) == 2
409
410
a.append(a.clone())
411
assert_series_equal(a, pl.Series("b", [1, 2, 1, 2]))
412
413
a = pl.Series("a", range(20))
414
assert a.head(5).len() == 5
415
assert a.tail(5).len() == 5
416
assert (a.head(5) != a.tail(5)).all()
417
418
a = pl.Series("a", [2, 1, 4])
419
a.sort(in_place=True)
420
assert_series_equal(a, pl.Series("a", [1, 2, 4]))
421
a = pl.Series("a", [2, 1, 1, 4, 4, 4])
422
assert_series_equal(a.arg_unique(), pl.Series("a", [0, 1, 3], dtype=UInt32))
423
424
assert_series_equal(a.gather([2, 3]), pl.Series("a", [1, 4]))
425
426
427
def test_series_dtype_is() -> None:
428
s = pl.Series("s", [1, 2, 3])
429
430
assert s.dtype.is_numeric()
431
assert s.dtype.is_integer()
432
assert s.dtype.is_signed_integer()
433
assert not s.dtype.is_unsigned_integer()
434
assert (s * 0.99).dtype.is_float()
435
436
s = pl.Series("s", [1, 2, 3], dtype=pl.UInt8)
437
assert s.dtype.is_numeric()
438
assert s.dtype.is_integer()
439
assert not s.dtype.is_signed_integer()
440
assert s.dtype.is_unsigned_integer()
441
442
s = pl.Series("bool", [True, None, False])
443
assert not s.dtype.is_numeric()
444
445
s = pl.Series("s", ["testing..."])
446
assert s.dtype == pl.String
447
assert s.dtype != pl.Boolean
448
449
s = pl.Series("s", [], dtype=pl.Decimal(20, 15))
450
assert not s.dtype.is_float()
451
assert s.dtype.is_numeric()
452
assert s.is_empty()
453
454
s = pl.Series("s", [], dtype=pl.Datetime("ms", time_zone="UTC"))
455
assert s.dtype.is_temporal()
456
457
458
def test_series_head_tail_limit() -> None:
459
s = pl.Series(range(10))
460
461
assert_series_equal(s.head(5), pl.Series(range(5)))
462
assert_series_equal(s.limit(5), s.head(5))
463
assert_series_equal(s.tail(5), pl.Series(range(5, 10)))
464
465
# check if it doesn't fail when out of bounds
466
assert s.head(100).len() == 10
467
assert s.limit(100).len() == 10
468
assert s.tail(100).len() == 10
469
470
# negative values
471
assert_series_equal(s.head(-7), pl.Series(range(3)))
472
assert s.head(-2).len() == 8
473
assert_series_equal(s.tail(-8), pl.Series(range(8, 10)))
474
assert s.head(-6).len() == 4
475
476
# negative values out of bounds
477
assert s.head(-12).len() == 0
478
assert s.limit(-12).len() == 0
479
assert s.tail(-12).len() == 0
480
481
482
def test_filter_ops() -> None:
483
a = pl.Series("a", range(20))
484
assert a.filter(a > 1).len() == 18
485
assert a.filter(a < 1).len() == 1
486
assert a.filter(a <= 1).len() == 2
487
assert a.filter(a >= 1).len() == 19
488
assert a.filter(a == 1).len() == 1
489
assert a.filter(a != 1).len() == 19
490
491
492
def test_cast() -> None:
493
a = pl.Series("a", range(20))
494
495
assert a.cast(pl.Float32).dtype == pl.Float32
496
assert a.cast(pl.Float64).dtype == pl.Float64
497
assert a.cast(pl.Int32).dtype == pl.Int32
498
assert a.cast(pl.UInt32).dtype == pl.UInt32
499
assert a.cast(pl.Datetime).dtype == pl.Datetime
500
assert a.cast(pl.Date).dtype == pl.Date
501
502
# display failed values, GH#4706
503
with pytest.raises(InvalidOperationError, match="foobar"):
504
pl.Series(["1", "2", "3", "4", "foobar"]).cast(int)
505
506
507
@pytest.mark.parametrize(
508
"test_data",
509
[
510
[1, None, 2],
511
["abc", None, "xyz"],
512
[None, datetime.now()],
513
[[1, 2], [3, 4], None],
514
],
515
)
516
def test_to_pandas(test_data: list[Any]) -> None:
517
a = pl.Series("s", test_data)
518
b = a.to_pandas()
519
520
assert a.name == b.name
521
assert b.isnull().sum() == 1
522
523
vals_b: list[Any]
524
if a.dtype == pl.List:
525
vals_b = [(None if x is None else x.tolist()) for x in b]
526
else:
527
v = b.replace({np.nan: None}).values.tolist()
528
vals_b = cast("list[Any]", v)
529
530
assert vals_b == test_data
531
532
try:
533
c = a.to_pandas(use_pyarrow_extension_array=True)
534
assert a.name == c.name
535
assert c.isnull().sum() == 1
536
vals_c = [None if x is pd.NA else x for x in c.tolist()]
537
assert vals_c == test_data
538
except ModuleNotFoundError:
539
# Skip test if pandas>=1.5.0 or Pyarrow>=8.0.0 is not installed.
540
pass
541
542
543
def test_series_to_list() -> None:
544
s = pl.Series("a", range(20))
545
result = s.to_list()
546
assert isinstance(result, list)
547
assert len(result) == 20
548
549
a = pl.Series("a", [1, None, 2])
550
assert a.null_count() == 1
551
assert a.to_list() == [1, None, 2]
552
553
554
@pytest.mark.may_fail_cloud # reason: list.to_struct is a eager operation
555
def test_to_struct() -> None:
556
s = pl.Series("nums", ["12 34", "56 78", "90 00"]).str.extract_all(r"\d+")
557
558
assert s.list.to_struct().struct.fields == ["field_0", "field_1"]
559
assert s.list.to_struct(fields=lambda idx: f"n{idx:02}").struct.fields == [
560
"n00",
561
"n01",
562
]
563
assert_frame_equal(
564
s.list.to_struct(fields=["one", "two"]).struct.unnest(),
565
pl.DataFrame({"one": ["12", "56", "90"], "two": ["34", "78", "00"]}),
566
)
567
568
569
def test_to_struct_empty() -> None:
570
df = pl.DataFrame({"y": [[], [], []]}, schema={"y": pl.List(pl.Int64)})
571
empty_df = df.select(pl.col("y").list.to_struct(fields=[]).struct.unnest())
572
assert empty_df.shape == (0, 0)
573
574
575
def test_sort() -> None:
576
a = pl.Series("a", [2, 1, 3])
577
assert_series_equal(a.sort(), pl.Series("a", [1, 2, 3]))
578
assert_series_equal(a.sort(descending=True), pl.Series("a", [3, 2, 1]))
579
580
581
def test_rechunk() -> None:
582
a = pl.Series("a", [1, 2, 3])
583
b = pl.Series("b", [4, 5, 6])
584
a.append(b)
585
assert a.n_chunks() == 2
586
assert a.rechunk(in_place=False).n_chunks() == 1
587
a.rechunk(in_place=True)
588
assert a.n_chunks() == 1
589
590
591
def test_indexing() -> None:
592
a = pl.Series("a", [1, 2, None])
593
assert a[1] == 2
594
assert a[2] is None
595
b = pl.Series("b", [True, False])
596
assert b[0]
597
assert not b[1]
598
a = pl.Series("a", ["a", None])
599
assert a[0] == "a"
600
assert a[1] is None
601
a = pl.Series("a", [0.1, None])
602
assert a[0] == 0.1
603
assert a[1] is None
604
605
606
def test_arrow() -> None:
607
a = pl.Series("a", [1, 2, 3, None])
608
out = a.to_arrow()
609
assert out == pa.array([1, 2, 3, None])
610
611
b = pl.Series("b", [1.0, 2.0, 3.0, None])
612
out = b.to_arrow()
613
assert out == pa.array([1.0, 2.0, 3.0, None])
614
615
c = pl.Series("c", ["A", "BB", "CCC", None])
616
out = c.to_arrow()
617
assert out == pa.array(["A", "BB", "CCC", None], type=pa.large_string())
618
assert_series_equal(pl.from_arrow(out), c.rename("")) # type: ignore[arg-type]
619
620
out = c.to_frame().to_arrow()["c"]
621
assert isinstance(out, (pa.Array, pa.ChunkedArray))
622
assert_series_equal(pl.from_arrow(out), c) # type: ignore[arg-type]
623
assert_series_equal(pl.from_arrow(out, schema=["x"]), c.rename("x")) # type: ignore[arg-type]
624
625
d = pl.Series("d", [None, None, None], pl.Null)
626
out = d.to_arrow()
627
assert out == pa.nulls(3)
628
629
s = cast(
630
"pl.Series",
631
pl.from_arrow(pa.array([["foo"], ["foo", "bar"]], pa.list_(pa.utf8()))),
632
)
633
assert s.dtype == pl.List
634
635
636
def test_arrow_cat() -> None:
637
# categorical dtype tests (including various forms of empty pyarrow array)
638
arr0 = pa.array(["foo", "bar"], pa.dictionary(pa.int32(), pa.utf8()))
639
assert_series_equal(
640
pl.Series("arr", ["foo", "bar"], pl.Categorical), pl.Series("arr", arr0)
641
)
642
arr1 = pa.array(["xxx", "xxx", None, "yyy"]).dictionary_encode()
643
arr2 = pa.chunked_array([], arr1.type)
644
arr3 = pa.array([], arr1.type)
645
arr4 = pa.array([]).dictionary_encode()
646
647
assert_series_equal(
648
pl.Series("arr", ["xxx", "xxx", None, "yyy"], dtype=pl.Categorical),
649
pl.Series("arr", arr1),
650
)
651
for arr in (arr2, arr3):
652
assert_series_equal(
653
pl.Series("arr", [], dtype=pl.Categorical), pl.Series("arr", arr)
654
)
655
assert_series_equal(pl.Series("arr", [], dtype=pl.Null), pl.Series("arr", arr4))
656
657
658
def test_pycapsule_interface() -> None:
659
a = pl.Series("a", [1, 2, 3, None])
660
out = pa.chunked_array(PyCapsuleStreamHolder(a))
661
out_arr = out.combine_chunks()
662
assert out_arr == pa.array([1, 2, 3, None])
663
664
665
def test_get() -> None:
666
a = pl.Series("a", [1, 2, 3])
667
pos_idxs = pl.Series("idxs", [2, 0, 1, 0], dtype=pl.Int8)
668
neg_and_pos_idxs = pl.Series(
669
"neg_and_pos_idxs", [-2, 1, 0, -1, 2, -3], dtype=pl.Int8
670
)
671
empty_idxs = pl.Series("idxs", [], dtype=pl.Int8)
672
empty_ints: list[int] = []
673
assert a[0] == 1
674
assert a[:2].to_list() == [1, 2]
675
assert a[range(1)].to_list() == [1]
676
assert a[range(0, 4, 2)].to_list() == [1, 3]
677
assert a[:0].to_list() == []
678
assert a[empty_ints].to_list() == []
679
assert a[neg_and_pos_idxs.to_list()].to_list() == [2, 2, 1, 3, 3, 1]
680
for dtype in (
681
pl.UInt8,
682
pl.UInt16,
683
pl.UInt32,
684
pl.UInt64,
685
pl.Int8,
686
pl.Int16,
687
pl.Int32,
688
pl.Int64,
689
):
690
assert a[pos_idxs.cast(dtype)].to_list() == [3, 1, 2, 1]
691
assert a[pos_idxs.cast(dtype).to_numpy()].to_list() == [3, 1, 2, 1]
692
assert a[empty_idxs.cast(dtype)].to_list() == []
693
assert a[empty_idxs.cast(dtype).to_numpy()].to_list() == []
694
695
for dtype in (pl.Int8, pl.Int16, pl.Int32, pl.Int64):
696
nps = a[neg_and_pos_idxs.cast(dtype).to_numpy()]
697
assert nps.to_list() == [2, 2, 1, 3, 3, 1]
698
699
700
def test_set() -> None:
701
a = pl.Series("a", [True, False, True])
702
mask = pl.Series("msk", [True, False, True])
703
a[mask] = False
704
assert_series_equal(a, pl.Series("a", [False] * 3))
705
706
707
def test_set_value_as_list_fail() -> None:
708
# only allowed for numerical physical types
709
s = pl.Series("a", [1, 2, 3])
710
s[[0, 2]] = [4, 5]
711
assert s.to_list() == [4, 2, 5]
712
713
# for other types it is not allowed
714
s = pl.Series("a", ["a", "b", "c"])
715
with pytest.raises(TypeError):
716
s[[0, 1]] = ["d", "e"]
717
718
s = pl.Series("a", [True, False, False])
719
with pytest.raises(TypeError):
720
s[[0, 1]] = [True, False]
721
722
723
@pytest.mark.parametrize("key", [True, False, 1.0])
724
def test_set_invalid_key(key: Any) -> None:
725
s = pl.Series("a", [1, 2, 3])
726
with pytest.raises(TypeError):
727
s[key] = 1
728
729
730
@pytest.mark.parametrize(
731
"key",
732
[
733
pl.Series([False, True, True]),
734
pl.Series([1, 2], dtype=UInt32),
735
pl.Series([1, 2], dtype=UInt64),
736
],
737
)
738
def test_set_key_series(key: pl.Series) -> None:
739
"""Only UInt32/UInt64/bool are allowed."""
740
s = pl.Series("a", [1, 2, 3])
741
s[key] = 4
742
assert_series_equal(s, pl.Series("a", [1, 4, 4]))
743
744
745
def test_set_np_array_boolean_mask() -> None:
746
a = pl.Series("a", [1, 2, 3])
747
mask = np.array([True, False, True])
748
a[mask] = 4
749
assert_series_equal(a, pl.Series("a", [4, 2, 4]))
750
751
752
@pytest.mark.parametrize("dtype", [np.int32, np.int64, np.uint32, np.uint64])
753
def test_set_np_array(dtype: Any) -> None:
754
a = pl.Series("a", [1, 2, 3])
755
idx = np.array([0, 2], dtype=dtype)
756
a[idx] = 4
757
assert_series_equal(a, pl.Series("a", [4, 2, 4]))
758
759
760
@pytest.mark.parametrize("idx", [[0, 2], (0, 2)])
761
def test_set_list_and_tuple(idx: list[int] | tuple[int]) -> None:
762
a = pl.Series("a", [1, 2, 3])
763
a[idx] = 4
764
assert_series_equal(a, pl.Series("a", [4, 2, 4]))
765
766
767
def test_init_nested_tuple() -> None:
768
s1 = pl.Series("s", (1, 2, 3))
769
assert s1.to_list() == [1, 2, 3]
770
771
s2 = pl.Series("s", ((1, 2, 3),), dtype=pl.List(pl.UInt8))
772
assert s2.to_list() == [[1, 2, 3]]
773
assert s2.dtype == pl.List(pl.UInt8)
774
775
s3 = pl.Series("s", ((1, 2, 3), (1, 2, 3)), dtype=pl.List(pl.Int32))
776
assert s3.to_list() == [[1, 2, 3], [1, 2, 3]]
777
assert s3.dtype == pl.List(pl.Int32)
778
779
780
def test_fill_null() -> None:
781
s = pl.Series("a", [1, 2, None])
782
assert_series_equal(s.fill_null(strategy="forward"), pl.Series("a", [1, 2, 2]))
783
assert_series_equal(s.fill_null(14), pl.Series("a", [1, 2, 14], dtype=Int64))
784
785
a = pl.Series("a", [0.0, 1.0, None, 2.0, None, 3.0])
786
787
assert a.fill_null(0).to_list() == [0.0, 1.0, 0.0, 2.0, 0.0, 3.0]
788
assert a.fill_null(strategy="zero").to_list() == [0.0, 1.0, 0.0, 2.0, 0.0, 3.0]
789
assert a.fill_null(strategy="max").to_list() == [0.0, 1.0, 3.0, 2.0, 3.0, 3.0]
790
assert a.fill_null(strategy="min").to_list() == [0.0, 1.0, 0.0, 2.0, 0.0, 3.0]
791
assert a.fill_null(strategy="one").to_list() == [0.0, 1.0, 1.0, 2.0, 1.0, 3.0]
792
assert a.fill_null(strategy="forward").to_list() == [0.0, 1.0, 1.0, 2.0, 2.0, 3.0]
793
assert a.fill_null(strategy="backward").to_list() == [0.0, 1.0, 2.0, 2.0, 3.0, 3.0]
794
assert a.fill_null(strategy="mean").to_list() == [0.0, 1.0, 1.5, 2.0, 1.5, 3.0]
795
assert a.forward_fill().to_list() == [0.0, 1.0, 1.0, 2.0, 2.0, 3.0]
796
assert a.backward_fill().to_list() == [0.0, 1.0, 2.0, 2.0, 3.0, 3.0]
797
798
b = pl.Series("b", ["a", None, "c", None, "e"])
799
assert b.fill_null(strategy="min").to_list() == ["a", "a", "c", "a", "e"]
800
assert b.fill_null(strategy="max").to_list() == ["a", "e", "c", "e", "e"]
801
assert b.fill_null(strategy="zero").to_list() == ["a", "", "c", "", "e"]
802
assert b.fill_null(strategy="forward").to_list() == ["a", "a", "c", "c", "e"]
803
assert b.fill_null(strategy="backward").to_list() == ["a", "c", "c", "e", "e"]
804
805
c = pl.Series("c", [b"a", None, b"c", None, b"e"])
806
assert c.fill_null(strategy="min").to_list() == [b"a", b"a", b"c", b"a", b"e"]
807
assert c.fill_null(strategy="max").to_list() == [b"a", b"e", b"c", b"e", b"e"]
808
assert c.fill_null(strategy="zero").to_list() == [b"a", b"", b"c", b"", b"e"]
809
assert c.fill_null(strategy="forward").to_list() == [b"a", b"a", b"c", b"c", b"e"]
810
assert c.fill_null(strategy="backward").to_list() == [b"a", b"c", b"c", b"e", b"e"]
811
812
df = pl.DataFrame(
813
[
814
pl.Series("i32", [1, 2, None], dtype=pl.Int32),
815
pl.Series("i64", [1, 2, None], dtype=pl.Int64),
816
pl.Series("f32", [1, 2, None], dtype=pl.Float32),
817
pl.Series("cat", ["a", "b", None], dtype=pl.Categorical),
818
pl.Series("str", ["a", "b", None], dtype=pl.String),
819
pl.Series("bool", [True, True, None], dtype=pl.Boolean),
820
]
821
)
822
823
assert df.fill_null(0, matches_supertype=False).fill_null("bar").fill_null(
824
False
825
).to_dict(as_series=False) == {
826
"i32": [1, 2, None],
827
"i64": [1, 2, 0],
828
"f32": [1.0, 2.0, None],
829
"cat": ["a", "b", "bar"],
830
"str": ["a", "b", "bar"],
831
"bool": [True, True, False],
832
}
833
834
assert df.fill_null(0, matches_supertype=True).fill_null("bar").fill_null(
835
False
836
).to_dict(as_series=False) == {
837
"i32": [1, 2, 0],
838
"i64": [1, 2, 0],
839
"f32": [1.0, 2.0, 0.0],
840
"cat": ["a", "b", "bar"],
841
"str": ["a", "b", "bar"],
842
"bool": [True, True, False],
843
}
844
df = pl.DataFrame({"a": [1, None, 2, None]})
845
846
out = df.with_columns(
847
pl.col("a").cast(pl.UInt8).alias("u8"),
848
pl.col("a").cast(pl.UInt16).alias("u16"),
849
pl.col("a").cast(pl.UInt32).alias("u32"),
850
pl.col("a").cast(pl.UInt64).alias("u64"),
851
).fill_null(3)
852
853
assert out.to_dict(as_series=False) == {
854
"a": [1, 3, 2, 3],
855
"u8": [1, 3, 2, 3],
856
"u16": [1, 3, 2, 3],
857
"u32": [1, 3, 2, 3],
858
"u64": [1, 3, 2, 3],
859
}
860
assert out.dtypes == [pl.Int64, pl.UInt8, pl.UInt16, pl.UInt32, pl.UInt64]
861
862
863
def test_str_series_min_max_10674() -> None:
864
str_series = pl.Series("b", ["a", None, "c", None, "e"], dtype=pl.String)
865
assert str_series.min() == "a"
866
assert str_series.max() == "e"
867
assert str_series.sort(descending=False).min() == "a"
868
assert str_series.sort(descending=True).max() == "e"
869
870
871
def test_fill_nan() -> None:
872
nan = float("nan")
873
a = pl.Series("a", [1.0, nan, 2.0, nan, 3.0])
874
assert_series_equal(a.fill_nan(None), pl.Series("a", [1.0, None, 2.0, None, 3.0]))
875
assert_series_equal(a.fill_nan(0), pl.Series("a", [1.0, 0.0, 2.0, 0.0, 3.0]))
876
877
878
def test_map_elements() -> None:
879
with pytest.warns(PolarsInefficientMapWarning):
880
a = pl.Series("a", [1, 2, None])
881
b = a.map_elements(lambda x: x**2, return_dtype=pl.Int64)
882
assert list(b) == [1, 4, None]
883
884
with pytest.warns(PolarsInefficientMapWarning):
885
a = pl.Series("a", ["foo", "bar", None])
886
b = a.map_elements(lambda x: x + "py", return_dtype=pl.String)
887
assert list(b) == ["foopy", "barpy", None]
888
889
b = a.map_elements(lambda x: len(x), return_dtype=pl.Int32)
890
assert list(b) == [3, 3, None]
891
892
b = a.map_elements(lambda x: len(x))
893
assert list(b) == [3, 3, None]
894
895
# just check that it runs (somehow problem with conditional compilation)
896
a = pl.Series("a", [2, 2, 3]).cast(pl.Datetime)
897
a.map_elements(lambda x: x)
898
a = pl.Series("a", [2, 2, 3]).cast(pl.Date)
899
a.map_elements(lambda x: x)
900
901
902
def test_shape() -> None:
903
s = pl.Series([1, 2, 3])
904
assert s.shape == (3,)
905
906
907
@pytest.mark.parametrize("arrow_available", [True, False])
908
def test_create_list_series(arrow_available: bool, monkeypatch: Any) -> None:
909
monkeypatch.setattr(pl.series.series, "_PYARROW_AVAILABLE", arrow_available)
910
a = [[1, 2], None, [None, 3]]
911
s = pl.Series("", a)
912
assert s.to_list() == a
913
914
915
def test_iter() -> None:
916
s = pl.Series("", [1, 2, 3])
917
918
itr = s.__iter__()
919
assert itr.__next__() == 1
920
assert itr.__next__() == 2
921
assert itr.__next__() == 3
922
assert sum(s) == 6
923
924
925
def test_empty() -> None:
926
a = pl.Series(dtype=pl.Int8)
927
assert a.dtype == pl.Int8
928
assert a.is_empty()
929
930
a = pl.Series()
931
assert a.dtype == pl.Null
932
assert a.is_empty()
933
934
a = pl.Series("name", [])
935
assert a.dtype == pl.Null
936
assert a.is_empty()
937
938
a = pl.Series(values=(), dtype=pl.Int8)
939
assert a.dtype == pl.Int8
940
assert a.is_empty()
941
942
assert_series_equal(pl.Series(), pl.Series())
943
assert_series_equal(
944
pl.Series(dtype=pl.Int32), pl.Series(dtype=pl.Int64), check_dtypes=False
945
)
946
947
with pytest.raises(TypeError, match="ambiguous"):
948
not pl.Series()
949
950
951
def test_round() -> None:
952
a = pl.Series("f", [1.003, 2.003])
953
b = a.round(2)
954
assert b.to_list() == [1.00, 2.00]
955
956
b = a.round()
957
assert b.to_list() == [1.0, 2.0]
958
959
960
def test_round_int() -> None:
961
s = pl.Series([1, 2, 3])
962
assert_series_equal(s, s.round())
963
964
965
@pytest.mark.parametrize(
966
("series", "digits", "expected_result"),
967
[
968
pytest.param(pl.Series([1.234, 0.1234]), 2, pl.Series([1.2, 0.12]), id="f64"),
969
pytest.param(
970
pl.Series([1.234, 0.1234]).cast(pl.Float32),
971
2,
972
pl.Series([1.2, 0.12]).cast(pl.Float32),
973
id="f32",
974
),
975
pytest.param(pl.Series([123400, 1234]), 2, pl.Series([120000, 1200]), id="i64"),
976
pytest.param(
977
pl.Series([123400, 1234]).cast(pl.Int32),
978
2,
979
pl.Series([120000, 1200]).cast(pl.Int32),
980
id="i32",
981
),
982
pytest.param(
983
pl.Series([0.0]), 2, pl.Series([0.0]), id="0 should remain the same"
984
),
985
],
986
)
987
def test_round_sig_figs(
988
series: pl.Series, digits: int, expected_result: pl.Series
989
) -> None:
990
result = series.round_sig_figs(digits=digits)
991
assert_series_equal(result, expected_result)
992
993
994
def test_round_sig_figs_raises_exc() -> None:
995
with pytest.raises(pl.exceptions.InvalidOperationError):
996
pl.Series([1.234, 0.1234]).round_sig_figs(digits=0)
997
998
999
def test_apply_list_out() -> None:
1000
s = pl.Series("count", [3, 2, 2])
1001
out = s.map_elements(lambda val: pl.repeat(val, val, eager=True))
1002
assert out[0].to_list() == [3, 3, 3]
1003
assert out[1].to_list() == [2, 2]
1004
assert out[2].to_list() == [2, 2]
1005
1006
1007
def test_reinterpret() -> None:
1008
s = pl.Series("a", [1, 1, 2], dtype=pl.UInt64)
1009
assert s.reinterpret(signed=True).dtype == pl.Int64
1010
df = pl.DataFrame([s])
1011
assert df.select([pl.col("a").reinterpret(signed=True)])["a"].dtype == pl.Int64
1012
1013
1014
def test_mode() -> None:
1015
s = pl.Series("a", [1, 1, 2])
1016
assert s.mode().to_list() == [1]
1017
assert s.set_sorted().mode().to_list() == [1]
1018
1019
df = pl.DataFrame([s])
1020
assert df.select([pl.col("a").mode()])["a"].to_list() == [1]
1021
assert (
1022
pl.Series(["foo", "bar", "buz", "bar"], dtype=pl.Categorical).mode().item()
1023
== "bar"
1024
)
1025
assert pl.Series([1.0, 2.0, 3.0, 2.0]).mode().item() == 2.0
1026
assert pl.Series(["a", "b", "c", "b"]).mode().item() == "b"
1027
1028
# sorted data
1029
assert set(pl.int_range(0, 3, eager=True).mode().to_list()) == {0, 1, 2}
1030
1031
1032
def test_diff() -> None:
1033
s = pl.Series("a", [1, 2, 3, 2, 2, 3, 0])
1034
1035
assert_series_equal(
1036
s.diff(),
1037
pl.Series("a", [None, 1, 1, -1, 0, 1, -3]),
1038
)
1039
assert_series_equal(
1040
s.diff(null_behavior="drop"),
1041
pl.Series("a", [1, 1, -1, 0, 1, -3]),
1042
)
1043
1044
1045
def test_diff_negative() -> None:
1046
s = pl.Series("a", [1, 2, 3, 2, 2, 3, 0])
1047
1048
assert_series_equal(
1049
s.diff(-1),
1050
pl.Series("a", [-1, -1, 1, 0, -1, 3, None]),
1051
)
1052
assert_series_equal(
1053
s.diff(-1, null_behavior="drop"),
1054
pl.Series("a", [-1, -1, 1, 0, -1, 3]),
1055
)
1056
1057
1058
def test_pct_change() -> None:
1059
s = pl.Series("a", [1, 2, 4, 8, 16, 32, 64])
1060
expected = pl.Series("a", [None, None, 3.0, 3.0, 3.0, 3.0, 3.0])
1061
assert_series_equal(s.pct_change(2), expected)
1062
assert_series_equal(s.pct_change(pl.Series([2])), expected)
1063
# negative
1064
assert pl.Series(range(5)).pct_change(-1).to_list() == [
1065
-1.0,
1066
-0.5,
1067
-0.3333333333333333,
1068
-0.25,
1069
None,
1070
]
1071
1072
1073
def test_skew() -> None:
1074
s = pl.Series("a", [1, 2, 3, 2, 2, 3, 0])
1075
1076
assert s.skew(bias=True) == pytest.approx(-0.5953924651018018)
1077
assert s.skew(bias=False) == pytest.approx(-0.7717168360221258)
1078
1079
df = pl.DataFrame([s])
1080
assert np.isclose(
1081
df.select(pl.col("a").skew(bias=False))["a"][0], -0.7717168360221258
1082
)
1083
1084
1085
def test_kurtosis() -> None:
1086
s = pl.Series("a", [1, 2, 3, 2, 2, 3, 0])
1087
expected = -0.6406250000000004
1088
1089
assert s.kurtosis() == pytest.approx(expected)
1090
df = pl.DataFrame([s])
1091
assert np.isclose(df.select(pl.col("a").kurtosis())["a"][0], expected)
1092
1093
1094
def test_sqrt() -> None:
1095
s = pl.Series("a", [1, 2])
1096
assert_series_equal(s.sqrt(), pl.Series("a", [1.0, np.sqrt(2)]))
1097
df = pl.DataFrame([s])
1098
assert_series_equal(
1099
df.select(pl.col("a").sqrt())["a"], pl.Series("a", [1.0, np.sqrt(2)])
1100
)
1101
1102
1103
def test_cbrt() -> None:
1104
s = pl.Series("a", [1, 2])
1105
assert_series_equal(s.cbrt(), pl.Series("a", [1.0, np.cbrt(2)]))
1106
df = pl.DataFrame([s])
1107
assert_series_equal(
1108
df.select(pl.col("a").cbrt())["a"], pl.Series("a", [1.0, np.cbrt(2)])
1109
)
1110
1111
1112
def test_range() -> None:
1113
s1 = pl.Series("a", [1, 2, 3, 2, 2, 3, 0])
1114
assert_series_equal(s1[2:5], s1[range(2, 5)])
1115
1116
ranges = [range(-2, 1), range(3), range(2, 8, 2)]
1117
1118
s2 = pl.Series("b", ranges, dtype=pl.List(pl.Int8))
1119
assert s2.to_list() == [[-2, -1, 0], [0, 1, 2], [2, 4, 6]]
1120
assert s2.dtype == pl.List(pl.Int8)
1121
assert s2.name == "b"
1122
1123
s3 = pl.Series("c", (ranges for _ in range(3)))
1124
assert s3.to_list() == [
1125
[[-2, -1, 0], [0, 1, 2], [2, 4, 6]],
1126
[[-2, -1, 0], [0, 1, 2], [2, 4, 6]],
1127
[[-2, -1, 0], [0, 1, 2], [2, 4, 6]],
1128
]
1129
assert s3.dtype == pl.List(pl.List(pl.Int64))
1130
1131
df = pl.DataFrame([s1])
1132
assert_frame_equal(df[2:5], df[range(2, 5)])
1133
1134
1135
def test_strict_cast() -> None:
1136
with pytest.raises(InvalidOperationError):
1137
pl.Series("a", [2**16]).cast(dtype=pl.Int16, strict=True)
1138
with pytest.raises(InvalidOperationError):
1139
pl.DataFrame({"a": [2**16]}).select([pl.col("a").cast(pl.Int16, strict=True)])
1140
1141
1142
def test_floor_divide() -> None:
1143
s = pl.Series("a", [1, 2, 3])
1144
assert_series_equal(s // 2, pl.Series("a", [0, 1, 1]))
1145
assert_series_equal(
1146
pl.DataFrame([s]).select(pl.col("a") // 2)["a"], pl.Series("a", [0, 1, 1])
1147
)
1148
1149
1150
def test_true_divide() -> None:
1151
s = pl.Series("a", [1, 2])
1152
assert_series_equal(s / 2, pl.Series("a", [0.5, 1.0]))
1153
assert_series_equal(
1154
pl.DataFrame([s]).select(pl.col("a") / 2)["a"], pl.Series("a", [0.5, 1.0])
1155
)
1156
1157
# rtruediv
1158
assert_series_equal(
1159
pl.DataFrame([s]).select(2 / pl.col("a"))["literal"],
1160
pl.Series("literal", [2.0, 1.0]),
1161
)
1162
1163
# https://github.com/pola-rs/polars/issues/1369
1164
vals = [3000000000, 2, 3]
1165
foo = pl.Series(vals)
1166
assert_series_equal(foo / 1, pl.Series(vals, dtype=Float64))
1167
assert_series_equal(
1168
pl.DataFrame({"a": vals}).select([pl.col("a") / 1])["a"],
1169
pl.Series("a", vals, dtype=Float64),
1170
)
1171
1172
1173
def test_bitwise() -> None:
1174
a = pl.Series("a", [1, 2, 3])
1175
b = pl.Series("b", [3, 4, 5])
1176
assert_series_equal(a & b, pl.Series("a", [1, 0, 1]))
1177
assert_series_equal(a | b, pl.Series("a", [3, 6, 7]))
1178
assert_series_equal(a ^ b, pl.Series("a", [2, 6, 6]))
1179
1180
df = pl.DataFrame([a, b])
1181
out = df.select(
1182
(pl.col("a") & pl.col("b")).alias("and"),
1183
(pl.col("a") | pl.col("b")).alias("or"),
1184
(pl.col("a") ^ pl.col("b")).alias("xor"),
1185
)
1186
assert_series_equal(out["and"], pl.Series("and", [1, 0, 1]))
1187
assert_series_equal(out["or"], pl.Series("or", [3, 6, 7]))
1188
assert_series_equal(out["xor"], pl.Series("xor", [2, 6, 6]))
1189
1190
# ensure mistaken use of logical 'and'/'or' raises an exception
1191
with pytest.raises(TypeError, match="ambiguous"):
1192
a and b # type: ignore[redundant-expr]
1193
1194
with pytest.raises(TypeError, match="ambiguous"):
1195
a or b # type: ignore[redundant-expr]
1196
1197
1198
def test_from_generator_or_iterable() -> None:
1199
# generator function
1200
def gen(n: int) -> Iterator[int]:
1201
yield from range(n)
1202
1203
# iterable object
1204
class Data:
1205
def __init__(self, n: int) -> None:
1206
self._n = n
1207
1208
def __iter__(self) -> Iterator[int]:
1209
yield from gen(self._n)
1210
1211
expected = pl.Series("s", range(10))
1212
assert expected.dtype == pl.Int64
1213
1214
for generated_series in (
1215
pl.Series("s", values=gen(10)),
1216
pl.Series("s", values=Data(10)),
1217
pl.Series("s", values=(x for x in gen(10))),
1218
):
1219
assert_series_equal(expected, generated_series)
1220
1221
# test 'iterable_to_pyseries' directly to validate 'chunk_size' behaviour
1222
ps1 = iterable_to_pyseries("s", gen(10), dtype=pl.UInt8)
1223
ps2 = iterable_to_pyseries("s", gen(10), dtype=pl.UInt8, chunk_size=3)
1224
ps3 = iterable_to_pyseries("s", Data(10), dtype=pl.UInt8, chunk_size=6)
1225
1226
expected = pl.Series("s", range(10), dtype=pl.UInt8)
1227
assert expected.dtype == pl.UInt8
1228
1229
for ps in (ps1, ps2, ps3):
1230
generated_series = pl.Series("s")
1231
generated_series._s = ps
1232
assert_series_equal(expected, generated_series)
1233
1234
# empty generator
1235
assert_series_equal(pl.Series("s", []), pl.Series("s", values=gen(0)))
1236
1237
1238
def test_from_sequences(monkeypatch: Any) -> None:
1239
# test int, str, bool, flt
1240
values = [
1241
[[1], [None, 3]],
1242
[["foo"], [None, "bar"]],
1243
[[True], [None, False]],
1244
[[1.0], [None, 3.0]],
1245
]
1246
1247
for vals in values:
1248
monkeypatch.setattr(pl.series.series, "_PYARROW_AVAILABLE", False)
1249
a = pl.Series("a", vals)
1250
monkeypatch.setattr(pl.series.series, "_PYARROW_AVAILABLE", True)
1251
b = pl.Series("a", vals)
1252
assert_series_equal(a, b)
1253
assert a.to_list() == vals
1254
1255
1256
def test_comparisons_int_series_to_float() -> None:
1257
srs_int = pl.Series([1, 2, 3, 4])
1258
1259
assert_series_equal(srs_int - 1.0, pl.Series([0.0, 1.0, 2.0, 3.0]))
1260
assert_series_equal(srs_int + 1.0, pl.Series([2.0, 3.0, 4.0, 5.0]))
1261
assert_series_equal(srs_int * 2.0, pl.Series([2.0, 4.0, 6.0, 8.0]))
1262
assert_series_equal(srs_int / 2.0, pl.Series([0.5, 1.0, 1.5, 2.0]))
1263
assert_series_equal(srs_int % 2.0, pl.Series([1.0, 0.0, 1.0, 0.0]))
1264
assert_series_equal(4.0 % srs_int, pl.Series([0.0, 0.0, 1.0, 0.0]))
1265
1266
assert_series_equal(srs_int // 2.0, pl.Series([0.0, 1.0, 1.0, 2.0]))
1267
assert_series_equal(srs_int < 3.0, pl.Series([True, True, False, False]))
1268
assert_series_equal(srs_int <= 3.0, pl.Series([True, True, True, False]))
1269
assert_series_equal(srs_int > 3.0, pl.Series([False, False, False, True]))
1270
assert_series_equal(srs_int >= 3.0, pl.Series([False, False, True, True]))
1271
assert_series_equal(srs_int == 3.0, pl.Series([False, False, True, False]))
1272
assert_series_equal(srs_int - True, pl.Series([0, 1, 2, 3]))
1273
1274
1275
def test_comparisons_int_series_to_float_scalar() -> None:
1276
srs_int = pl.Series([1, 2, 3, 4])
1277
1278
assert_series_equal(srs_int < 1.5, pl.Series([True, False, False, False]))
1279
assert_series_equal(srs_int > 1.5, pl.Series([False, True, True, True]))
1280
1281
1282
def test_comparisons_datetime_series_to_date_scalar() -> None:
1283
srs_date = pl.Series([date(2023, 1, 1), date(2023, 1, 2), date(2023, 1, 3)])
1284
dt = datetime(2023, 1, 1, 12, 0, 0)
1285
1286
assert_series_equal(srs_date < dt, pl.Series([True, False, False]))
1287
assert_series_equal(srs_date > dt, pl.Series([False, True, True]))
1288
1289
1290
def test_comparisons_float_series_to_int() -> None:
1291
srs_float = pl.Series([1.0, 2.0, 3.0, 4.0])
1292
1293
assert_series_equal(srs_float - 1, pl.Series([0.0, 1.0, 2.0, 3.0]))
1294
assert_series_equal(srs_float + 1, pl.Series([2.0, 3.0, 4.0, 5.0]))
1295
assert_series_equal(srs_float * 2, pl.Series([2.0, 4.0, 6.0, 8.0]))
1296
assert_series_equal(srs_float / 2, pl.Series([0.5, 1.0, 1.5, 2.0]))
1297
assert_series_equal(srs_float % 2, pl.Series([1.0, 0.0, 1.0, 0.0]))
1298
assert_series_equal(4 % srs_float, pl.Series([0.0, 0.0, 1.0, 0.0]))
1299
1300
assert_series_equal(srs_float // 2, pl.Series([0.0, 1.0, 1.0, 2.0]))
1301
assert_series_equal(srs_float < 3, pl.Series([True, True, False, False]))
1302
assert_series_equal(srs_float <= 3, pl.Series([True, True, True, False]))
1303
assert_series_equal(srs_float > 3, pl.Series([False, False, False, True]))
1304
assert_series_equal(srs_float >= 3, pl.Series([False, False, True, True]))
1305
assert_series_equal(srs_float == 3, pl.Series([False, False, True, False]))
1306
assert_series_equal(srs_float - True, pl.Series([0.0, 1.0, 2.0, 3.0]))
1307
1308
1309
def test_comparisons_bool_series_to_int() -> None:
1310
srs_bool = pl.Series([True, False])
1311
1312
# (native bool comparison should work...)
1313
for t, f in ((True, False), (False, True)):
1314
assert list(srs_bool == t) == list(srs_bool != f) == [t, f]
1315
1316
# TODO: do we want this to work?
1317
assert_series_equal(srs_bool / 1, pl.Series([True, False], dtype=Float64))
1318
match = (
1319
r"cannot do arithmetic with Series of dtype: Boolean"
1320
r" and argument of type: 'bool'"
1321
)
1322
with pytest.raises(TypeError, match=match):
1323
srs_bool - 1
1324
with pytest.raises(TypeError, match=match):
1325
srs_bool + 1
1326
match = (
1327
r"cannot do arithmetic with Series of dtype: Boolean"
1328
r" and argument of type: 'bool'"
1329
)
1330
with pytest.raises(TypeError, match=match):
1331
srs_bool % 2
1332
with pytest.raises(TypeError, match=match):
1333
srs_bool * 1
1334
1335
from operator import ge, gt, le, lt
1336
1337
for op in (ge, gt, le, lt):
1338
for scalar in (0, 1.0, True, False):
1339
op_str = op.__name__.replace("e", "t_eq")
1340
with pytest.raises(
1341
NotImplementedError,
1342
match=rf"Series of type Boolean does not have {op_str} operator",
1343
):
1344
op(srs_bool, scalar)
1345
1346
1347
@pytest.mark.parametrize(
1348
("values", "compare_with", "compares_equal"),
1349
[
1350
(
1351
[date(1999, 12, 31), date(2021, 1, 31)],
1352
date(2021, 1, 31),
1353
[False, True],
1354
),
1355
(
1356
[datetime(2021, 1, 1, 12, 0, 0), datetime(2021, 1, 2, 12, 0, 0)],
1357
datetime(2021, 1, 1, 12, 0, 0),
1358
[True, False],
1359
),
1360
(
1361
[timedelta(days=1), timedelta(days=2)],
1362
timedelta(days=1),
1363
[True, False],
1364
),
1365
],
1366
)
1367
def test_temporal_comparison(
1368
values: list[Any], compare_with: Any, compares_equal: list[bool]
1369
) -> None:
1370
assert_series_equal(
1371
pl.Series(values) == compare_with,
1372
pl.Series(compares_equal, dtype=pl.Boolean),
1373
)
1374
1375
1376
def test_to_dummies() -> None:
1377
s = pl.Series("a", [1, 2, 3])
1378
result = s.to_dummies()
1379
expected = pl.DataFrame(
1380
{"a_1": [1, 0, 0], "a_2": [0, 1, 0], "a_3": [0, 0, 1]},
1381
schema={"a_1": pl.UInt8, "a_2": pl.UInt8, "a_3": pl.UInt8},
1382
)
1383
assert_frame_equal(result, expected)
1384
1385
1386
def test_to_dummies_drop_first() -> None:
1387
s = pl.Series("a", [1, 2, 3])
1388
result = s.to_dummies(drop_first=True)
1389
expected = pl.DataFrame(
1390
{"a_2": [0, 1, 0], "a_3": [0, 0, 1]},
1391
schema={"a_2": pl.UInt8, "a_3": pl.UInt8},
1392
)
1393
assert_frame_equal(result, expected)
1394
1395
1396
def test_to_dummies_drop_nulls() -> None:
1397
s = pl.Series("a", [1, 2, None])
1398
result = s.to_dummies(drop_nulls=True)
1399
expected = pl.DataFrame(
1400
{"a_1": [1, 0, 0], "a_2": [0, 1, 0]},
1401
schema={"a_1": pl.UInt8, "a_2": pl.UInt8},
1402
)
1403
assert_frame_equal(result, expected)
1404
1405
1406
def test_to_dummies_null_clash_19096() -> None:
1407
with pytest.raises(
1408
DuplicateError, match="column with name '_null' has more than one occurrence"
1409
):
1410
pl.Series([None, "null"]).to_dummies()
1411
1412
1413
def test_chunk_lengths() -> None:
1414
s = pl.Series("a", [1, 2, 2, 3])
1415
# this is a Series with one chunk, of length 4
1416
assert s.n_chunks() == 1
1417
assert s.chunk_lengths() == [4]
1418
1419
1420
def test_limit() -> None:
1421
s = pl.Series("a", [1, 2, 3])
1422
assert_series_equal(s.limit(2), pl.Series("a", [1, 2]))
1423
1424
1425
def test_filter() -> None:
1426
s = pl.Series("a", [1, 2, 3])
1427
mask = pl.Series("", [True, False, True])
1428
1429
assert_series_equal(s.filter(mask), pl.Series("a", [1, 3]))
1430
assert_series_equal(s.filter([True, False, True]), pl.Series("a", [1, 3]))
1431
assert_series_equal(s.filter(np.array([True, False, True])), pl.Series("a", [1, 3]))
1432
1433
with pytest.raises(RuntimeError, match="Expected a boolean mask"):
1434
s.filter(np.array([1, 0, 1]))
1435
1436
1437
def test_gather_every() -> None:
1438
s = pl.Series("a", [1, 2, 3, 4])
1439
assert_series_equal(s.gather_every(2), pl.Series("a", [1, 3]))
1440
assert_series_equal(s.gather_every(2, offset=1), pl.Series("a", [2, 4]))
1441
1442
1443
def test_arg_sort() -> None:
1444
s = pl.Series("a", [5, 3, 4, 1, 2])
1445
expected = pl.Series("a", [3, 4, 1, 2, 0], dtype=UInt32)
1446
1447
assert_series_equal(s.arg_sort(), expected)
1448
1449
expected_descending = pl.Series("a", [0, 2, 1, 4, 3], dtype=UInt32)
1450
assert_series_equal(s.arg_sort(descending=True), expected_descending)
1451
1452
1453
@pytest.mark.parametrize(
1454
("series", "argmin", "argmax"),
1455
[
1456
# Numeric
1457
(pl.Series([5, 3, 4, 1, 2]), 3, 0),
1458
(pl.Series([None, 5, 1]), 2, 1),
1459
# Boolean
1460
(pl.Series([True, False]), 1, 0),
1461
(pl.Series([True, True]), 0, 0),
1462
(pl.Series([False, False]), 0, 0),
1463
(pl.Series([None, True, False, True]), 2, 1),
1464
(pl.Series([None, True, True]), 1, 1),
1465
(pl.Series([None, False, False]), 1, 1),
1466
# String
1467
(pl.Series(["a", "c", "b"]), 0, 1),
1468
(pl.Series([None, "a", None, "b"]), 1, 3),
1469
# Categorical
1470
(pl.Series(["c", "b", "a"], dtype=pl.Categorical(ordering="lexical")), 2, 0),
1471
(pl.Series("s", [None, "c", "b", None, "a"], pl.Categorical("lexical")), 4, 1),
1472
],
1473
)
1474
def test_arg_min_arg_max(series: pl.Series, argmin: int, argmax: int) -> None:
1475
assert series.arg_min() == argmin, (
1476
f"values: {series.to_list()}, expected {argmin} got {series.arg_min()}"
1477
)
1478
assert series.arg_max() == argmax, (
1479
f"values: {series.to_list()}, expected {argmax} got {series.arg_max()}"
1480
)
1481
1482
1483
@pytest.mark.parametrize(
1484
("series"),
1485
[
1486
# All nulls
1487
pl.Series([None, None], dtype=pl.Int32),
1488
pl.Series([None, None], dtype=pl.Boolean),
1489
pl.Series([None, None], dtype=pl.String),
1490
pl.Series([None, None], dtype=pl.Categorical),
1491
pl.Series([None, None], dtype=pl.Categorical(ordering="lexical")),
1492
# Empty Series
1493
pl.Series([], dtype=pl.Int32),
1494
pl.Series([], dtype=pl.Boolean),
1495
pl.Series([], dtype=pl.String),
1496
pl.Series([], dtype=pl.Categorical),
1497
],
1498
)
1499
def test_arg_min_arg_max_all_nulls_or_empty(series: pl.Series) -> None:
1500
assert series.arg_min() is None
1501
assert series.arg_max() is None
1502
1503
1504
def test_arg_min_and_arg_max_sorted() -> None:
1505
# test ascending and descending numerical series
1506
s = pl.Series([None, 1, 2, 3, 4, 5])
1507
s.sort(in_place=True) # set ascending sorted flag
1508
assert s.flags == {"SORTED_ASC": True, "SORTED_DESC": False}
1509
assert s.arg_min() == 1
1510
assert s.arg_max() == 5
1511
s = pl.Series([None, 5, 4, 3, 2, 1])
1512
s.sort(descending=True, in_place=True) # set descing sorted flag
1513
assert s.flags == {"SORTED_ASC": False, "SORTED_DESC": True}
1514
assert s.arg_min() == 5
1515
assert s.arg_max() == 1
1516
1517
# test ascending and descending str series
1518
s = pl.Series([None, "a", "b", "c", "d", "e"])
1519
s.sort(in_place=True) # set ascending sorted flag
1520
assert s.flags == {"SORTED_ASC": True, "SORTED_DESC": False}
1521
assert s.arg_min() == 1
1522
assert s.arg_max() == 5
1523
s = pl.Series([None, "e", "d", "c", "b", "a"])
1524
s.sort(descending=True, in_place=True) # set descing sorted flag
1525
assert s.flags == {"SORTED_ASC": False, "SORTED_DESC": True}
1526
assert s.arg_min() == 5
1527
assert s.arg_max() == 1
1528
1529
1530
def test_is_null_is_not_null() -> None:
1531
s = pl.Series("a", [1.0, 2.0, 3.0, None])
1532
assert_series_equal(s.is_null(), pl.Series("a", [False, False, False, True]))
1533
assert_series_equal(s.is_not_null(), pl.Series("a", [True, True, True, False]))
1534
1535
1536
def test_is_finite_is_infinite() -> None:
1537
s = pl.Series("a", [1.0, 2.0, np.inf])
1538
assert_series_equal(s.is_finite(), pl.Series("a", [True, True, False]))
1539
assert_series_equal(s.is_infinite(), pl.Series("a", [False, False, True]))
1540
1541
1542
@pytest.mark.parametrize("float_type", [pl.Float32, pl.Float64])
1543
def test_is_nan_is_not_nan(float_type: PolarsDataType) -> None:
1544
s = pl.Series([1.0, np.nan, None], dtype=float_type)
1545
1546
assert_series_equal(s.is_nan(), pl.Series([False, True, None]))
1547
assert_series_equal(s.is_not_nan(), pl.Series([True, False, None]))
1548
assert_series_equal(s.fill_nan(2.0), pl.Series([1.0, 2.0, None], dtype=float_type))
1549
assert_series_equal(s.drop_nans(), pl.Series([1.0, None], dtype=float_type))
1550
1551
1552
def test_float_methods_on_ints() -> None:
1553
# these float-specific methods work on non-float numeric types
1554
s = pl.Series([1, None], dtype=pl.Int32)
1555
assert_series_equal(s.is_finite(), pl.Series([True, None]))
1556
assert_series_equal(s.is_infinite(), pl.Series([False, None]))
1557
assert_series_equal(s.is_nan(), pl.Series([False, None]))
1558
assert_series_equal(s.is_not_nan(), pl.Series([True, None]))
1559
1560
1561
def test_dot() -> None:
1562
s1 = pl.Series("a", [1, 2, 3])
1563
s2 = pl.Series("b", [4.0, 5.0, 6.0])
1564
1565
assert np.array([1, 2, 3]) @ np.array([4, 5, 6]) == 32
1566
1567
for dot_result in (
1568
s1.dot(s2),
1569
s1 @ s2,
1570
[1, 2, 3] @ s2,
1571
s1 @ np.array([4, 5, 6]),
1572
):
1573
assert dot_result == 32
1574
1575
with pytest.raises(ShapeError, match="length mismatch"):
1576
s1 @ [4, 5, 6, 7, 8]
1577
1578
1579
@pytest.mark.parametrize(
1580
("dtype"),
1581
[pl.Int8, pl.Int16, pl.Int32, pl.Float32, pl.Float64],
1582
)
1583
def test_peak_max_peak_min(dtype: pl.DataType) -> None:
1584
s = pl.Series("a", [4, 1, 3, 2, 5], dtype=dtype)
1585
1586
result = s.peak_min()
1587
expected = pl.Series("a", [False, True, False, True, False])
1588
assert_series_equal(result, expected)
1589
1590
result = s.peak_max()
1591
expected = pl.Series("a", [True, False, True, False, True])
1592
assert_series_equal(result, expected)
1593
1594
1595
def test_peak_max_peak_min_bool() -> None:
1596
s = pl.Series("a", [False, True, False, True, True, False], dtype=pl.Boolean)
1597
result = s.peak_min()
1598
expected = pl.Series("a", [False, False, True, False, False, False])
1599
assert_series_equal(result, expected)
1600
1601
result = s.peak_max()
1602
expected = pl.Series("a", [False, True, False, False, False, False])
1603
assert_series_equal(result, expected)
1604
1605
1606
def test_shrink_to_fit() -> None:
1607
s = pl.Series("a", [4, 1, 3, 2, 5])
1608
sf = s.shrink_to_fit(in_place=True)
1609
assert sf is s
1610
1611
s = pl.Series("a", [4, 1, 3, 2, 5])
1612
sf = s.shrink_to_fit(in_place=False)
1613
assert s is not sf
1614
1615
1616
@pytest.mark.parametrize("unit", ["ns", "us", "ms"])
1617
def test_cast_datetime_to_time(unit: TimeUnit) -> None:
1618
a = pl.Series(
1619
"a",
1620
[
1621
datetime(2022, 9, 7, 0, 0),
1622
datetime(2022, 9, 6, 12, 0),
1623
datetime(2022, 9, 7, 23, 59, 59),
1624
datetime(2022, 9, 7, 23, 59, 59, 201),
1625
],
1626
dtype=Datetime(unit),
1627
)
1628
if unit == "ms":
1629
# NOTE: microseconds are lost for `unit=ms`
1630
expected_values = [time(0, 0), time(12, 0), time(23, 59, 59), time(23, 59, 59)]
1631
else:
1632
expected_values = [
1633
time(0, 0),
1634
time(12, 0),
1635
time(23, 59, 59),
1636
time(23, 59, 59, 201),
1637
]
1638
expected = pl.Series("a", expected_values)
1639
assert_series_equal(a.cast(Time), expected)
1640
1641
1642
def test_init_categorical() -> None:
1643
for values in [[None], ["foo", "bar"], [None, "foo", "bar"]]:
1644
expected = pl.Series("a", values, dtype=pl.String).cast(pl.Categorical)
1645
a = pl.Series("a", values, dtype=pl.Categorical)
1646
assert_series_equal(a, expected)
1647
1648
1649
def test_iter_nested_list() -> None:
1650
elems = list(pl.Series("s", [[1, 2], [3, 4]]))
1651
assert_series_equal(elems[0], pl.Series([1, 2]))
1652
assert_series_equal(elems[1], pl.Series([3, 4]))
1653
1654
rev_elems = list(reversed(pl.Series("s", [[1, 2], [3, 4]])))
1655
assert_series_equal(rev_elems[0], pl.Series([3, 4]))
1656
assert_series_equal(rev_elems[1], pl.Series([1, 2]))
1657
1658
1659
def test_iter_nested_struct() -> None:
1660
# note: this feels inconsistent with the above test for nested list, but
1661
# let's ensure the behaviour is codified before potentially modifying...
1662
elems = list(pl.Series("s", [{"a": 1, "b": 2}, {"a": 3, "b": 4}]))
1663
assert elems[0] == {"a": 1, "b": 2}
1664
assert elems[1] == {"a": 3, "b": 4}
1665
1666
rev_elems = list(reversed(pl.Series("s", [{"a": 1, "b": 2}, {"a": 3, "b": 4}])))
1667
assert rev_elems[0] == {"a": 3, "b": 4}
1668
assert rev_elems[1] == {"a": 1, "b": 2}
1669
1670
1671
@pytest.mark.parametrize(
1672
"dtype",
1673
[
1674
pl.UInt8,
1675
pl.Float32,
1676
pl.Int32,
1677
pl.Boolean,
1678
pl.List(pl.String),
1679
pl.Struct([pl.Field("a", pl.Int64), pl.Field("b", pl.Boolean)]),
1680
],
1681
)
1682
def test_nested_list_types_preserved(dtype: pl.DataType) -> None:
1683
srs = pl.Series([pl.Series([], dtype=dtype) for _ in range(5)])
1684
for srs_nested in srs:
1685
assert srs_nested.dtype == dtype
1686
1687
1688
def test_to_physical() -> None:
1689
# casting an int result in an int
1690
s = pl.Series("a", [1, 2, 3])
1691
assert_series_equal(s.to_physical(), s)
1692
1693
# casting a date results in an Int32
1694
s = pl.Series("a", [date(2020, 1, 1)] * 3)
1695
expected = pl.Series("a", [18262] * 3, dtype=Int32)
1696
assert_series_equal(s.to_physical(), expected)
1697
1698
# casting a categorical results in a UInt32
1699
s = pl.Series(["cat1"]).cast(pl.Categorical)
1700
assert s.to_physical().dtype == pl.UInt32
1701
1702
# casting a small enum results in a UInt8
1703
s = pl.Series(["cat1"]).cast(pl.Enum(["cat1"]))
1704
assert s.to_physical().dtype == pl.UInt8
1705
1706
# casting a List(Categorical) results in a List(UInt32)
1707
s = pl.Series([["cat1"]]).cast(pl.List(pl.Categorical))
1708
assert s.to_physical().dtype == pl.List(pl.UInt32)
1709
1710
# casting a List(Enum) with a small enum results in a List(UInt8)
1711
s = pl.Series(["cat1"]).cast(pl.List(pl.Enum(["cat1"])))
1712
assert s.to_physical().dtype == pl.List(pl.UInt8)
1713
1714
1715
def test_to_physical_rechunked_21285() -> None:
1716
# A series with multiple chunks, dtype is array or list of structs with a
1717
# null field (causes rechunking) and a field with a different physical and
1718
# logical repr (causes the full body of `to_physical_repr` to run).
1719
arr_dtype = pl.Array(pl.Struct({"f0": pl.Time, "f1": pl.Null}), shape=(1,))
1720
s = pl.Series("a", [None], arr_dtype) # content doesn't matter
1721
s = s.append(s)
1722
expected_arr_dtype = pl.Array(pl.Struct({"f0": Int64, "f1": pl.Null}), shape=(1,))
1723
expected = pl.Series("a", [None, None], expected_arr_dtype)
1724
assert_series_equal(s.to_physical(), expected)
1725
1726
list_dtype = pl.List(pl.Struct({"f0": pl.Time, "f1": pl.Null}))
1727
s = pl.Series("a", [None], list_dtype) # content doesn't matter
1728
s = s.append(s)
1729
expected_list_dtype = pl.List(pl.Struct({"f0": Int64, "f1": pl.Null}))
1730
expected = pl.Series("a", [None, None], expected_list_dtype)
1731
assert_series_equal(s.to_physical(), expected)
1732
1733
1734
def test_is_between_datetime() -> None:
1735
s = pl.Series("a", [datetime(2020, 1, 1, 10, 0, 0), datetime(2020, 1, 1, 20, 0, 0)])
1736
start = datetime(2020, 1, 1, 12, 0, 0)
1737
end = datetime(2020, 1, 1, 23, 0, 0)
1738
expected = pl.Series("a", [False, True])
1739
1740
# only on the expression api
1741
result = s.to_frame().with_columns(pl.col("*").is_between(start, end)).to_series()
1742
assert_series_equal(result, expected)
1743
1744
1745
@pytest.mark.parametrize(
1746
"f",
1747
[
1748
"sin",
1749
"cos",
1750
"tan",
1751
"arcsin",
1752
"arccos",
1753
"arctan",
1754
"sinh",
1755
"cosh",
1756
"tanh",
1757
"arcsinh",
1758
"arccosh",
1759
"arctanh",
1760
],
1761
)
1762
@pytest.mark.filterwarnings("ignore:invalid value encountered:RuntimeWarning")
1763
def test_trigonometric(f: str) -> None:
1764
s = pl.Series("a", [0.0, math.pi, None, math.nan])
1765
expected = (
1766
pl.Series("a", getattr(np, f)(s.to_numpy()))
1767
.to_frame()
1768
.with_columns(pl.when(s.is_null()).then(None).otherwise(pl.col("a")).alias("a"))
1769
.to_series()
1770
)
1771
result = getattr(s, f)()
1772
assert_series_equal(result, expected)
1773
1774
1775
@pytest.mark.filterwarnings("ignore:invalid value encountered:RuntimeWarning")
1776
def test_trigonometric_cot() -> None:
1777
# cotangent is not available in numpy...
1778
s = pl.Series("a", [0.0, math.pi, None, math.nan])
1779
expected = pl.Series("a", [math.inf, -8.1656e15, None, math.nan])
1780
assert_series_equal(s.cot(), expected)
1781
1782
1783
def test_trigonometric_invalid_input() -> None:
1784
# String
1785
s = pl.Series("a", ["1", "2", "3"])
1786
with pytest.raises(InvalidOperationError):
1787
s.sin()
1788
1789
# Date
1790
s = pl.Series("a", [date(1990, 2, 28), date(2022, 7, 26)])
1791
with pytest.raises(InvalidOperationError):
1792
s.cosh()
1793
1794
1795
@pytest.mark.parametrize("dtype", INTEGER_DTYPES)
1796
def test_product_ints(dtype: PolarsDataType) -> None:
1797
a = pl.Series("a", [1, 2, 3], dtype=dtype)
1798
out = a.product()
1799
assert out == 6
1800
a = pl.Series("a", [1, 2, None], dtype=dtype)
1801
out = a.product()
1802
assert out == 2
1803
a = pl.Series("a", [None, 2, 3], dtype=dtype)
1804
out = a.product()
1805
assert out == 6
1806
1807
1808
@pytest.mark.parametrize("dtype", FLOAT_DTYPES)
1809
def test_product_floats(dtype: PolarsDataType) -> None:
1810
a = pl.Series("a", [], dtype=dtype)
1811
out = a.product()
1812
assert out == 1
1813
a = pl.Series("a", [None, None], dtype=dtype)
1814
out = a.product()
1815
assert out == 1
1816
a = pl.Series("a", [3.0, None, float("nan")], dtype=dtype)
1817
out = a.product()
1818
assert math.isnan(out)
1819
1820
1821
def test_ceil() -> None:
1822
s = pl.Series([1.8, 1.2, 3.0])
1823
expected = pl.Series([2.0, 2.0, 3.0])
1824
assert_series_equal(s.ceil(), expected)
1825
1826
1827
def test_duration_arithmetic() -> None:
1828
# apply some basic duration math to series
1829
s = pl.Series([datetime(2022, 1, 1, 10, 20, 30), datetime(2022, 1, 2, 20, 40, 50)])
1830
d1 = pl.duration(days=5, microseconds=123456)
1831
d2 = timedelta(days=5, microseconds=123456)
1832
1833
expected_values = [
1834
datetime(2022, 1, 6, 10, 20, 30, 123456),
1835
datetime(2022, 1, 7, 20, 40, 50, 123456),
1836
]
1837
for d in (d1, d2):
1838
df1 = pl.select((s + d).alias("d_offset"))
1839
df2 = pl.select((d + s).alias("d_offset"))
1840
assert df1["d_offset"].to_list() == expected_values
1841
assert_series_equal(df1["d_offset"], df2["d_offset"])
1842
1843
1844
def test_mean_overflow() -> None:
1845
arr = np.array([255] * (1 << 17), dtype="int16")
1846
assert arr.mean() == 255.0
1847
1848
1849
def test_sign() -> None:
1850
# Integers
1851
a = pl.Series("a", [-9, -0, 0, 4, None])
1852
expected = pl.Series("a", [-1, 0, 0, 1, None])
1853
assert_series_equal(a.sign(), expected)
1854
1855
# Floats
1856
a = pl.Series("a", [-9.0, -0.0, 0.0, 4.0, float("nan"), None])
1857
expected = pl.Series("a", [-1.0, 0.0, 0.0, 1.0, float("nan"), None])
1858
assert_series_equal(a.sign(), expected)
1859
1860
# Invalid input
1861
a = pl.Series("a", [date(1950, 2, 1), date(1970, 1, 1), date(2022, 12, 12), None])
1862
with pytest.raises(InvalidOperationError):
1863
a.sign()
1864
1865
1866
def test_exp() -> None:
1867
s = pl.Series("a", [0.1, 0.01, None])
1868
expected = pl.Series("a", [1.1051709180756477, 1.010050167084168, None])
1869
assert_series_equal(s.exp(), expected)
1870
# test if we can run on empty series as well.
1871
assert s[:0].exp().to_list() == []
1872
1873
1874
def test_cumulative_eval() -> None:
1875
s = pl.Series("values", [1, 2, 3, 4, 5])
1876
1877
# evaluate expressions individually
1878
expr1 = pl.element().first()
1879
expr2 = pl.element().last() ** 2
1880
1881
expected1 = pl.Series("values", [1, 1, 1, 1, 1])
1882
expected2 = pl.Series("values", [1, 4, 9, 16, 25])
1883
assert_series_equal(s.cumulative_eval(expr1), expected1)
1884
assert_series_equal(s.cumulative_eval(expr2), expected2)
1885
1886
# evaluate combined expressions and validate
1887
expr3 = expr1 - expr2
1888
expected3 = pl.Series("values", [0, -3, -8, -15, -24])
1889
assert_series_equal(s.cumulative_eval(expr3), expected3)
1890
1891
1892
def test_clip() -> None:
1893
s = pl.Series("foo", [-50, 5, None, 50])
1894
assert s.clip(1, 10).to_list() == [1, 5, None, 10]
1895
1896
1897
def test_repr() -> None:
1898
s = pl.Series("ints", [1001, 2002, 3003])
1899
s_repr = repr(s)
1900
1901
assert "shape: (3,)" in s_repr
1902
assert "Series: 'ints' [i64]" in s_repr
1903
for n in s.to_list():
1904
assert str(n) in s_repr
1905
1906
class XSeries(pl.Series):
1907
"""Custom Series class."""
1908
1909
# check custom class name reflected in repr output
1910
x = XSeries("ints", [1001, 2002, 3003])
1911
x_repr = repr(x)
1912
1913
assert "shape: (3,)" in x_repr
1914
assert "XSeries: 'ints' [i64]" in x_repr
1915
assert "1001" in x_repr
1916
for n in x.to_list():
1917
assert str(n) in x_repr
1918
1919
1920
def test_repr_html(df: pl.DataFrame) -> None:
1921
# check it does not panic/error, and appears to contain a table
1922
html = pl.Series("misc", [123, 456, 789])._repr_html_()
1923
assert "<table" in html
1924
1925
1926
@pytest.mark.parametrize(
1927
("value", "time_unit", "exp", "exp_type"),
1928
[
1929
(13285, "d", date(2006, 5, 17), pl.Date),
1930
(1147880044, "s", datetime(2006, 5, 17, 15, 34, 4), pl.Datetime),
1931
(1147880044 * 1_000, "ms", datetime(2006, 5, 17, 15, 34, 4), pl.Datetime("ms")),
1932
(
1933
1147880044 * 1_000_000,
1934
"us",
1935
datetime(2006, 5, 17, 15, 34, 4),
1936
pl.Datetime("us"),
1937
),
1938
(
1939
1147880044 * 1_000_000_000,
1940
"ns",
1941
datetime(2006, 5, 17, 15, 34, 4),
1942
pl.Datetime("ns"),
1943
),
1944
],
1945
)
1946
def test_from_epoch_expr(
1947
value: int,
1948
time_unit: EpochTimeUnit,
1949
exp: date | datetime,
1950
exp_type: PolarsDataType,
1951
) -> None:
1952
s = pl.Series("timestamp", [value, None])
1953
result = pl.from_epoch(s, time_unit=time_unit)
1954
1955
expected = pl.Series("timestamp", [exp, None]).cast(exp_type)
1956
assert_series_equal(result, expected)
1957
1958
1959
def test_get_chunks() -> None:
1960
a = pl.Series("a", [1, 2])
1961
b = pl.Series("a", [3, 4])
1962
chunks = pl.concat([a, b], rechunk=False).get_chunks()
1963
assert_series_equal(chunks[0], a)
1964
assert_series_equal(chunks[1], b)
1965
1966
1967
def test_null_comparisons() -> None:
1968
s = pl.Series("s", [None, "str", "a"])
1969
assert (s.shift() == s).null_count() == 2
1970
assert (s.shift() != s).null_count() == 2
1971
1972
1973
def test_min_max_agg_on_str() -> None:
1974
strings = ["b", "a", "x"]
1975
s = pl.Series(strings)
1976
assert (s.min(), s.max()) == ("a", "x")
1977
1978
1979
def test_min_max_full_nan_15058() -> None:
1980
s = pl.Series([float("nan")] * 2)
1981
assert all(x != x for x in [s.min(), s.max()])
1982
1983
1984
def test_is_between() -> None:
1985
s = pl.Series("num", [1, 2, None, 4, 5])
1986
assert s.is_between(2, 4).to_list() == [False, True, None, True, False]
1987
1988
s = pl.Series("num", [1, 2, None, 4, 5])
1989
assert s.is_between(2, 4, closed="left").to_list() == [
1990
False,
1991
True,
1992
None,
1993
False,
1994
False,
1995
]
1996
1997
s = pl.Series("num", [1, 2, None, 4, 5])
1998
assert s.is_between(2, 4, closed="right").to_list() == [
1999
False,
2000
False,
2001
None,
2002
True,
2003
False,
2004
]
2005
2006
s = pl.Series("num", [1, 2, None, 4, 5])
2007
assert s.is_between(pl.lit(2) / 2, pl.lit(4) * 2, closed="both").to_list() == [
2008
True,
2009
True,
2010
None,
2011
True,
2012
True,
2013
]
2014
2015
s = pl.Series("s", ["a", "b", "c", "d", "e"])
2016
assert s.is_between("b", "d").to_list() == [
2017
False,
2018
True,
2019
True,
2020
True,
2021
False,
2022
]
2023
2024
2025
@pytest.mark.parametrize(
2026
("dtype", "lower", "upper"),
2027
[
2028
(pl.Int8, -128, 127),
2029
(pl.UInt8, 0, 255),
2030
(pl.Int16, -32768, 32767),
2031
(pl.UInt16, 0, 65535),
2032
(pl.Int32, -2147483648, 2147483647),
2033
(pl.UInt32, 0, 4294967295),
2034
(pl.Int64, -9223372036854775808, 9223372036854775807),
2035
(pl.UInt64, 0, 18446744073709551615),
2036
(pl.Float32, float("-inf"), float("inf")),
2037
(pl.Float64, float("-inf"), float("inf")),
2038
],
2039
)
2040
def test_upper_lower_bounds(
2041
dtype: PolarsDataType, upper: int | float, lower: int | float
2042
) -> None:
2043
s = pl.Series("s", dtype=dtype)
2044
assert s.lower_bound().item() == lower
2045
assert s.upper_bound().item() == upper
2046
2047
2048
def test_numpy_series_arithmetic() -> None:
2049
sx = pl.Series(values=[1, 2])
2050
y = np.array([3.0, 4.0])
2051
2052
result_add1 = y + sx
2053
result_add2 = sx + y
2054
expected_add = pl.Series([4.0, 6.0], dtype=pl.Float64)
2055
assert_series_equal(result_add1, expected_add) # type: ignore[arg-type]
2056
assert_series_equal(result_add2, expected_add)
2057
2058
result_sub1 = cast("pl.Series", y - sx) # py37 is different vs py311 on this one
2059
expected = pl.Series([2.0, 2.0], dtype=pl.Float64)
2060
assert_series_equal(result_sub1, expected)
2061
result_sub2 = sx - y
2062
expected = pl.Series([-2.0, -2.0], dtype=pl.Float64)
2063
assert_series_equal(result_sub2, expected)
2064
2065
result_mul1 = y * sx
2066
result_mul2 = sx * y
2067
expected = pl.Series([3.0, 8.0], dtype=pl.Float64)
2068
assert_series_equal(result_mul1, expected) # type: ignore[arg-type]
2069
assert_series_equal(result_mul2, expected)
2070
2071
result_div1 = y / sx
2072
expected = pl.Series([3.0, 2.0], dtype=pl.Float64)
2073
assert_series_equal(result_div1, expected) # type: ignore[arg-type]
2074
result_div2 = sx / y
2075
expected = pl.Series([1 / 3, 0.5], dtype=pl.Float64)
2076
assert_series_equal(result_div2, expected)
2077
2078
result_pow1 = y**sx
2079
expected = pl.Series([3.0, 16.0], dtype=pl.Float64)
2080
assert_series_equal(result_pow1, expected) # type: ignore[arg-type]
2081
result_pow2 = sx**y
2082
expected = pl.Series([1.0, 16.0], dtype=pl.Float64)
2083
assert_series_equal(result_pow2, expected) # type: ignore[arg-type]
2084
2085
2086
def test_from_epoch_seq_input() -> None:
2087
seq_input = [1147880044]
2088
expected = pl.Series([datetime(2006, 5, 17, 15, 34, 4)])
2089
result = pl.from_epoch(seq_input)
2090
assert_series_equal(result, expected)
2091
2092
2093
def test_symmetry_for_max_in_names() -> None:
2094
# int
2095
a = pl.Series("a", [1])
2096
assert (a - a.max()).name == (a.max() - a).name == a.name # type: ignore[union-attr]
2097
# float
2098
a = pl.Series("a", [1.0])
2099
assert (a - a.max()).name == (a.max() - a).name == a.name # type: ignore[union-attr]
2100
# duration
2101
a = pl.Series("a", [1], dtype=pl.Duration("ns"))
2102
assert (a - a.max()).name == (a.max() - a).name == a.name # type: ignore[union-attr]
2103
# datetime
2104
a = pl.Series("a", [1], dtype=pl.Datetime("ns"))
2105
assert (a - a.max()).name == (a.max() - a).name == a.name # type: ignore[union-attr]
2106
2107
# TODO: time arithmetic support?
2108
# a = pl.Series("a", [1], dtype=pl.Time)
2109
# assert (a - a.max()).name == (a.max() - a).name == a.name
2110
2111
2112
def test_series_getitem_out_of_bounds_positive() -> None:
2113
s = pl.Series([1, 2])
2114
with pytest.raises(
2115
IndexError, match="index 10 is out of bounds for sequence of length 2"
2116
):
2117
s[10]
2118
2119
2120
def test_series_getitem_out_of_bounds_negative() -> None:
2121
s = pl.Series([1, 2])
2122
with pytest.raises(
2123
IndexError, match="index -10 is out of bounds for sequence of length 2"
2124
):
2125
s[-10]
2126
2127
2128
def test_series_cmp_fast_paths() -> None:
2129
assert (
2130
pl.Series([None], dtype=pl.Int32) != pl.Series([1, 2], dtype=pl.Int32)
2131
).to_list() == [None, None]
2132
assert (
2133
pl.Series([None], dtype=pl.Int32) == pl.Series([1, 2], dtype=pl.Int32)
2134
).to_list() == [None, None]
2135
2136
assert (
2137
pl.Series([None], dtype=pl.String) != pl.Series(["a", "b"], dtype=pl.String)
2138
).to_list() == [None, None]
2139
assert (
2140
pl.Series([None], dtype=pl.String) == pl.Series(["a", "b"], dtype=pl.String)
2141
).to_list() == [None, None]
2142
2143
assert (
2144
pl.Series([None], dtype=pl.Boolean)
2145
!= pl.Series([True, False], dtype=pl.Boolean)
2146
).to_list() == [None, None]
2147
assert (
2148
pl.Series([None], dtype=pl.Boolean)
2149
== pl.Series([False, False], dtype=pl.Boolean)
2150
).to_list() == [None, None]
2151
2152
2153
def test_comp_series_with_str_13123() -> None:
2154
s = pl.Series(["1", "2", None])
2155
assert_series_equal(s != "1", pl.Series([False, True, None]))
2156
assert_series_equal(s == "1", pl.Series([True, False, None]))
2157
assert_series_equal(s.eq_missing("1"), pl.Series([True, False, False]))
2158
assert_series_equal(s.ne_missing("1"), pl.Series([False, True, True]))
2159
2160
2161
@pytest.mark.parametrize(
2162
("data", "single", "multiple", "single_expected", "multiple_expected"),
2163
[
2164
([1, 2, 3], 1, [2, 4], 0, [1, 3]),
2165
(["a", "b", "c"], "d", ["a", "d"], 3, [0, 3]),
2166
([b"a", b"b", b"c"], b"d", [b"a", b"d"], 3, [0, 3]),
2167
(
2168
[date(2022, 1, 2), date(2023, 4, 1)],
2169
date(2022, 1, 1),
2170
[date(1999, 10, 1), date(2024, 1, 1)],
2171
0,
2172
[0, 2],
2173
),
2174
([1, 2, 3], 1, np.array([2, 4]), 0, [1, 3]), # test np array.
2175
],
2176
)
2177
def test_search_sorted(
2178
data: list[Any],
2179
single: Any,
2180
multiple: list[Any],
2181
single_expected: Any,
2182
multiple_expected: list[Any],
2183
) -> None:
2184
s = pl.Series(data)
2185
single_s = s.search_sorted(single)
2186
assert single_s == single_expected
2187
2188
multiple_s = s.search_sorted(multiple)
2189
assert_series_equal(multiple_s, pl.Series(multiple_expected, dtype=pl.UInt32))
2190
2191
2192
def test_series_from_pandas_with_dtype() -> None:
2193
expected = pl.Series("foo", [1, 2, 3], dtype=pl.Int8)
2194
s = pl.Series("foo", pd.Series([1, 2, 3]), pl.Int8)
2195
assert_series_equal(s, expected)
2196
s = pl.Series("foo", pd.Series([1, 2, 3], dtype="Int16"), pl.Int8)
2197
assert_series_equal(s, expected)
2198
2199
with pytest.raises(InvalidOperationError, match="conversion from"):
2200
pl.Series("foo", pd.Series([-1, 2, 3]), pl.UInt8)
2201
s = pl.Series("foo", pd.Series([-1, 2, 3]), pl.UInt8, strict=False)
2202
assert s.to_list() == [None, 2, 3]
2203
assert s.dtype == pl.UInt8
2204
2205
with pytest.raises(InvalidOperationError, match="conversion from"):
2206
pl.Series("foo", pd.Series([-1, 2, 3], dtype="Int8"), pl.UInt8)
2207
s = pl.Series("foo", pd.Series([-1, 2, 3], dtype="Int8"), pl.UInt8, strict=False)
2208
assert s.to_list() == [None, 2, 3]
2209
assert s.dtype == pl.UInt8
2210
2211
2212
def test_series_from_pyarrow_with_dtype() -> None:
2213
s = pl.Series("foo", pa.array([-1, 2, 3]), pl.Int8)
2214
assert_series_equal(s, pl.Series("foo", [-1, 2, 3], dtype=pl.Int8))
2215
2216
with pytest.raises(InvalidOperationError, match="conversion from"):
2217
pl.Series("foo", pa.array([-1, 2, 3]), pl.UInt8)
2218
2219
s = pl.Series("foo", pa.array([-1, 2, 3]), dtype=pl.UInt8, strict=False)
2220
assert s.to_list() == [None, 2, 3]
2221
assert s.dtype == pl.UInt8
2222
2223
2224
def test_series_from_numpy_with_dtype() -> None:
2225
s = pl.Series("foo", np.array([-1, 2, 3]), pl.Int8)
2226
assert_series_equal(s, pl.Series("foo", [-1, 2, 3], dtype=pl.Int8))
2227
2228
with pytest.raises(InvalidOperationError, match="conversion from"):
2229
pl.Series("foo", np.array([-1, 2, 3]), pl.UInt8)
2230
2231
s = pl.Series("foo", np.array([-1, 2, 3]), dtype=pl.UInt8, strict=False)
2232
assert s.to_list() == [None, 2, 3]
2233
assert s.dtype == pl.UInt8
2234
2235
2236
def test_raise_invalid_is_between() -> None:
2237
with pytest.raises(pl.exceptions.InvalidOperationError):
2238
pl.select(pl.lit(2).is_between(pl.lit("11"), pl.lit("33")))
2239
2240
2241
def test_construction_large_nested_u64_17231() -> None:
2242
import polars as pl
2243
2244
values = [{"f0": [9223372036854775808]}]
2245
dtype = pl.Struct({"f0": pl.List(pl.UInt64)})
2246
assert pl.Series(values, dtype=dtype).to_list() == values
2247
2248
2249
def test_repeat_by() -> None:
2250
calculated = pl.select(a=pl.Series("a", [1, 2]).repeat_by(2))
2251
expected = pl.select(a=pl.Series("a", [[1, 1], [2, 2]]))
2252
assert calculated.equals(expected)
2253
2254
2255
def test_is_close() -> None:
2256
a = pl.Series(
2257
"a",
2258
[
2259
1.0,
2260
1.0,
2261
float("-inf"),
2262
float("inf"),
2263
float("inf"),
2264
float("inf"),
2265
float("nan"),
2266
],
2267
)
2268
b = pl.Series(
2269
"b", [1.3, 1.7, float("-inf"), float("inf"), float("-inf"), 1.0, float("nan")]
2270
)
2271
assert a.is_close(b, abs_tol=0.5).to_list() == [
2272
True,
2273
False,
2274
True,
2275
True,
2276
False,
2277
False,
2278
False,
2279
]
2280
2281
2282
def test_is_close_literal() -> None:
2283
a = pl.Series("a", [1.1, 1.2, 1.3, 1.4, float("inf"), float("nan")])
2284
assert a.is_close(1.2).to_list() == [False, True, False, False, False, False]
2285
2286
2287
def test_is_close_nans_equal() -> None:
2288
a = pl.Series("a", [1.0, float("nan")])
2289
b = pl.Series("b", [2.0, float("nan")])
2290
assert a.is_close(b, nans_equal=True).to_list() == [False, True]
2291
2292
2293
def test_is_close_invalid_abs_tol() -> None:
2294
with pytest.raises(pl.exceptions.ComputeError):
2295
pl.select(pl.lit(1.0).is_close(1, abs_tol=-1.0))
2296
2297
2298
def test_is_close_invalid_rel_tol() -> None:
2299
with pytest.raises(pl.exceptions.ComputeError):
2300
pl.select(pl.lit(1.0).is_close(1, rel_tol=-1.0))
2301
2302
2303
def test_comparisons_structs_raise() -> None:
2304
s = pl.Series([{"x": 1}, {"x": 2}, {"x": 3}])
2305
rhss = ["", " ", 5, {"x": 1}]
2306
for rhs in rhss:
2307
with pytest.raises(
2308
NotImplementedError,
2309
match=r"Series of type Struct\(\{'x': Int64\}\) does not have eq operator",
2310
):
2311
s == rhs # noqa: B015
2312
2313