Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/series/test_series.py
8440 views
1
from __future__ import annotations
2
3
import math
4
from datetime import date, datetime, time, timedelta
5
from decimal import Decimal
6
from typing import TYPE_CHECKING, Any, cast
7
from zoneinfo import ZoneInfo
8
9
import numpy as np
10
import pandas as pd
11
import pyarrow as pa
12
import pytest
13
14
import polars as pl
15
from polars._utils.construction import iterable_to_pyseries
16
from polars.datatypes import (
17
Datetime,
18
Field,
19
Float64,
20
Int32,
21
Int64,
22
Time,
23
UInt32,
24
UInt64,
25
Unknown,
26
)
27
from polars.exceptions import (
28
DuplicateError,
29
InvalidOperationError,
30
PolarsInefficientMapWarning,
31
ShapeError,
32
)
33
from polars.testing import assert_frame_equal, assert_series_equal
34
from tests.unit.conftest import FLOAT_DTYPES, INTEGER_DTYPES
35
from tests.unit.utils.pycapsule_utils import PyCapsuleStreamHolder
36
37
if TYPE_CHECKING:
38
from collections.abc import Iterator
39
40
from polars._typing import EpochTimeUnit, PolarsDataType, TimeUnit
41
from tests.conftest import PlMonkeyPatch
42
43
44
def test_cum_agg() -> None:
45
# confirm that known series give expected results
46
s = pl.Series("a", [1, 2, 3, 2])
47
assert_series_equal(s.cum_sum(), pl.Series("a", [1, 3, 6, 8]))
48
assert_series_equal(s.cum_min(), pl.Series("a", [1, 1, 1, 1]))
49
assert_series_equal(s.cum_max(), pl.Series("a", [1, 2, 3, 3]))
50
assert_series_equal(s.cum_prod(), pl.Series("a", [1, 2, 6, 12]))
51
52
53
def test_cum_agg_with_nulls() -> None:
54
# confirm that known series give expected results
55
s = pl.Series("a", [None, 2, None, 7, 8, None])
56
assert_series_equal(s.cum_sum(), pl.Series("a", [None, 2, None, 9, 17, None]))
57
assert_series_equal(s.cum_min(), pl.Series("a", [None, 2, None, 2, 2, None]))
58
assert_series_equal(s.cum_max(), pl.Series("a", [None, 2, None, 7, 8, None]))
59
assert_series_equal(s.cum_prod(), pl.Series("a", [None, 2, None, 14, 112, None]))
60
61
62
def test_cum_agg_with_infs() -> None:
63
# confirm that inf values are handled correctly
64
s = pl.Series([float("inf"), 0.0, 1.0])
65
assert_series_equal(s.cum_min(), pl.Series([float("inf"), 0.0, 0.0]))
66
67
s = pl.Series([float("-inf"), 0.0, 1.0])
68
assert_series_equal(s.cum_max(), pl.Series([float("-inf"), 0.0, 1.0]))
69
70
71
def test_cum_min_max_bool() -> None:
72
s = pl.Series("a", [None, True, True, None, False, None, True, False, False, None])
73
assert_series_equal(s.cum_min().cast(pl.Int32), s.cast(pl.Int32).cum_min())
74
assert_series_equal(s.cum_max().cast(pl.Int32), s.cast(pl.Int32).cum_max())
75
assert_series_equal(
76
s.cum_min(reverse=True).cast(pl.Int32), s.cast(pl.Int32).cum_min(reverse=True)
77
)
78
assert_series_equal(
79
s.cum_max(reverse=True).cast(pl.Int32), s.cast(pl.Int32).cum_max(reverse=True)
80
)
81
82
83
def test_init_inputs(plmonkeypatch: PlMonkeyPatch) -> None:
84
nan = float("nan")
85
# Good inputs
86
pl.Series("a", [1, 2])
87
pl.Series("a", values=[1, 2])
88
pl.Series(name="a", values=[1, 2])
89
pl.Series(values=[1, 2], name="a")
90
91
assert pl.Series([1, 2]).dtype == pl.Int64
92
assert pl.Series(values=[1, 2]).dtype == pl.Int64
93
assert pl.Series("a").dtype == pl.Null # Null dtype used in case of no data
94
assert pl.Series().dtype == pl.Null
95
assert pl.Series([]).dtype == pl.Null
96
assert (
97
pl.Series([None, None, None]).dtype == pl.Null
98
) # f32 type used for list with only None
99
assert pl.Series(values=[True, False]).dtype == pl.Boolean
100
assert pl.Series(values=np.array([True, False])).dtype == pl.Boolean
101
assert pl.Series(values=np.array(["foo", "bar"])).dtype == pl.String
102
assert pl.Series(values=["foo", "bar"]).dtype == pl.String
103
assert pl.Series("a", [pl.Series([1, 2, 4]), pl.Series([3, 2, 1])]).dtype == pl.List
104
assert pl.Series("a", [10000, 20000, 30000], dtype=pl.Time).dtype == pl.Time
105
106
# 2d numpy array and/or list of 1d numpy arrays
107
for res in (
108
pl.Series(
109
name="a",
110
values=np.array([[1, 2], [3, nan]], dtype=np.float32),
111
nan_to_null=True,
112
),
113
pl.Series(
114
name="a",
115
values=[
116
np.array([1, 2], dtype=np.float32),
117
np.array([3, nan], dtype=np.float32),
118
],
119
nan_to_null=True,
120
),
121
pl.Series(
122
name="a",
123
values=(
124
np.ndarray((2,), np.float32, np.array([1, 2], dtype=np.float32)),
125
np.ndarray((2,), np.float32, np.array([3, nan], dtype=np.float32)),
126
),
127
nan_to_null=True,
128
),
129
):
130
assert res.dtype == pl.Array(pl.Float32, shape=2)
131
assert res[0].to_list() == [1.0, 2.0]
132
assert res[1].to_list() == [3.0, None]
133
134
# numpy from arange, with/without dtype
135
two_ints = np.arange(2, dtype=np.int64)
136
three_ints = np.arange(3, dtype=np.int64)
137
for res in (
138
pl.Series("a", [two_ints, three_ints]),
139
pl.Series("a", [two_ints, three_ints], dtype=pl.List(pl.Int64)),
140
):
141
assert res.dtype == pl.List(pl.Int64)
142
assert res.to_list() == [[0, 1], [0, 1, 2]]
143
144
assert pl.Series(
145
values=np.array([["foo", "bar"], ["foo2", "bar2"]])
146
).dtype == pl.Array(pl.String, shape=2)
147
148
# lists
149
assert pl.Series("a", [[1, 2], [3, 4]]).dtype == pl.List(pl.Int64)
150
151
# conversion of Date to Datetime
152
s = pl.Series([date(2023, 1, 1), date(2023, 1, 2)], dtype=pl.Datetime)
153
assert s.to_list() == [datetime(2023, 1, 1), datetime(2023, 1, 2)]
154
assert Datetime == s.dtype
155
assert s.dtype.time_unit == "us" # type: ignore[attr-defined]
156
assert s.dtype.time_zone is None # type: ignore[attr-defined]
157
158
# conversion of Date to Datetime with specified timezone and units
159
tu: TimeUnit = "ms"
160
tz = "America/Argentina/Rio_Gallegos"
161
s = pl.Series(
162
[date(2023, 1, 1), date(2023, 1, 2)], dtype=pl.Datetime(tu)
163
).dt.replace_time_zone(tz)
164
d1 = datetime(2023, 1, 1, 0, 0, 0, 0, ZoneInfo(tz))
165
d2 = datetime(2023, 1, 2, 0, 0, 0, 0, ZoneInfo(tz))
166
assert s.to_list() == [d1, d2]
167
assert Datetime == s.dtype
168
assert s.dtype.time_unit == tu # type: ignore[attr-defined]
169
assert s.dtype.time_zone == tz # type: ignore[attr-defined]
170
171
# datetime64: check timeunit (auto-detect, implicit/explicit) and NaT
172
d64 = pd.date_range(date(2021, 8, 1), date(2021, 8, 3)).values.astype(
173
"datetime64[ns]"
174
)
175
d64[1] = None
176
177
expected = [datetime(2021, 8, 1, 0), None, datetime(2021, 8, 3, 0)]
178
for dtype in (None, Datetime, Datetime("ns")):
179
s = pl.Series("dates", d64, dtype)
180
assert s.to_list() == expected
181
assert Datetime == s.dtype
182
assert s.dtype.time_unit == "ns" # type: ignore[attr-defined]
183
184
s = pl.Series(values=d64.astype("<M8[ms]"))
185
assert s.dtype.time_unit == "ms" # type: ignore[attr-defined]
186
assert expected == s.to_list()
187
188
# pandas
189
assert pl.Series(pd.Series([1, 2])).dtype == pl.Int64
190
191
# Bad inputs
192
with pytest.raises(TypeError):
193
pl.Series([1, 2, 3], [1, 2, 3])
194
with pytest.raises(TypeError):
195
pl.Series({"a": [1, 2, 3]})
196
with pytest.raises(OverflowError):
197
pl.Series("bigint", [2**128])
198
199
# numpy not available
200
plmonkeypatch.setattr(pl.series.series, "_check_for_numpy", lambda x: False)
201
with pytest.raises(TypeError):
202
pl.DataFrame(np.array([1, 2, 3]), schema=["a"])
203
204
205
def test_init_structured_objects() -> None:
206
# validate init from dataclass, namedtuple, and pydantic model objects
207
from typing import NamedTuple
208
209
from polars._dependencies import dataclasses, pydantic
210
211
@dataclasses.dataclass
212
class TeaShipmentDC:
213
exporter: str
214
importer: str
215
product: str
216
tonnes: int | None
217
218
class TeaShipmentNT(NamedTuple):
219
exporter: str
220
importer: str
221
product: str
222
tonnes: None | int
223
224
class TeaShipmentPD(pydantic.BaseModel):
225
exporter: str
226
importer: str
227
product: str
228
tonnes: int
229
230
for Tea in (TeaShipmentDC, TeaShipmentNT, TeaShipmentPD):
231
t0 = Tea(exporter="Sri Lanka", importer="USA", product="Ceylon", tonnes=10)
232
t1 = Tea(exporter="India", importer="UK", product="Darjeeling", tonnes=25)
233
t2 = Tea(exporter="China", importer="UK", product="Keemum", tonnes=40)
234
235
s = pl.Series("t", [t0, t1, t2])
236
237
assert isinstance(s, pl.Series)
238
assert s.dtype.fields == [ # type: ignore[attr-defined]
239
Field("exporter", pl.String),
240
Field("importer", pl.String),
241
Field("product", pl.String),
242
Field("tonnes", pl.Int64),
243
]
244
assert s.to_list() == [
245
{
246
"exporter": "Sri Lanka",
247
"importer": "USA",
248
"product": "Ceylon",
249
"tonnes": 10,
250
},
251
{
252
"exporter": "India",
253
"importer": "UK",
254
"product": "Darjeeling",
255
"tonnes": 25,
256
},
257
{
258
"exporter": "China",
259
"importer": "UK",
260
"product": "Keemum",
261
"tonnes": 40,
262
},
263
]
264
assert_frame_equal(s.to_frame(), pl.DataFrame({"t": [t0, t1, t2]}))
265
266
267
def test_to_frame() -> None:
268
s1 = pl.Series([1, 2])
269
s2 = pl.Series("s", [1, 2])
270
271
df1 = s1.to_frame()
272
df2 = s2.to_frame()
273
df3 = s1.to_frame("xyz")
274
df4 = s2.to_frame("xyz")
275
276
for df, name in ((df1, ""), (df2, "s"), (df3, "xyz"), (df4, "xyz")):
277
assert isinstance(df, pl.DataFrame)
278
assert df.rows() == [(1,), (2,)]
279
assert df.columns == [name]
280
281
# note: the empty string IS technically a valid column name
282
assert s2.to_frame("").columns == [""]
283
assert s2.name == "s"
284
285
286
def test_bitwise_ops() -> None:
287
a = pl.Series([True, False, True])
288
b = pl.Series([False, True, True])
289
assert_series_equal((a & b), pl.Series([False, False, True]))
290
assert_series_equal((a | b), pl.Series([True, True, True]))
291
assert_series_equal((a ^ b), pl.Series([True, True, False]))
292
assert_series_equal((~a), pl.Series([False, True, False]))
293
294
# rand/rxor/ror we trigger by casting the left hand to a list here in the test
295
# Note that the type annotations only allow Series to be passed in, but there is
296
# specific code to deal with non-Series inputs.
297
assert_series_equal(
298
(True & a),
299
pl.Series([True, False, True]),
300
)
301
assert_series_equal(
302
(True | a),
303
pl.Series([True, True, True]),
304
)
305
assert_series_equal(
306
(True ^ a),
307
pl.Series([False, True, False]),
308
)
309
310
311
def test_bitwise_floats_invert() -> None:
312
s = pl.Series([2.0, 3.0, 0.0])
313
314
with pytest.raises(InvalidOperationError):
315
~s
316
317
318
def test_equality() -> None:
319
a = pl.Series("a", [1, 2])
320
b = a
321
322
cmp = a == b
323
assert isinstance(cmp, pl.Series)
324
assert cmp.sum() == 2
325
assert (a != b).sum() == 0
326
assert (a >= b).sum() == 2
327
assert (a <= b).sum() == 2
328
assert (a > b).sum() == 0
329
assert (a < b).sum() == 0
330
assert a.sum() == 3
331
assert_series_equal(a, b)
332
333
a = pl.Series("name", ["ham", "foo", "bar"])
334
assert_series_equal((a == "ham"), pl.Series("name", [True, False, False]))
335
336
a = pl.Series("name", [[1], [1, 2], [2, 3]])
337
assert_series_equal((a == [1]), pl.Series("name", [True, False, False]))
338
339
340
def test_agg() -> None:
341
series = pl.Series("a", [1, 2])
342
assert series.mean() == 1.5
343
assert series.min() == 1
344
assert series.max() == 2
345
346
347
def test_date_agg() -> None:
348
series = pl.Series(
349
[
350
date(2022, 8, 2),
351
date(2096, 8, 1),
352
date(9009, 9, 9),
353
],
354
dtype=pl.Date,
355
)
356
assert series.min() == date(2022, 8, 2)
357
assert series.max() == date(9009, 9, 9)
358
359
360
@pytest.mark.parametrize(
361
("s", "min", "max"),
362
[
363
(pl.Series(["c", "b", "a"], dtype=pl.Categorical()), "a", "c"),
364
(pl.Series([None, "a", "c", "b"], dtype=pl.Categorical()), "a", "c"),
365
(pl.Series([], dtype=pl.Categorical()), None, None),
366
(pl.Series(["c", "b", "a"], dtype=pl.Enum(["c", "b", "a"])), "c", "a"),
367
(pl.Series(["c", "b", "a"], dtype=pl.Enum(["c", "b", "a", "d"])), "c", "a"),
368
],
369
)
370
def test_categorical_agg(s: pl.Series, min: str | None, max: str | None) -> None:
371
assert s.min() == min
372
assert s.max() == max
373
374
375
def test_add_string() -> None:
376
s = pl.Series(["hello", "weird"])
377
result = s + " world"
378
print(result)
379
assert_series_equal(result, pl.Series(["hello world", "weird world"]))
380
381
result = "pfx:" + s
382
assert_series_equal(result, pl.Series("literal", ["pfx:hello", "pfx:weird"]))
383
384
385
@pytest.mark.parametrize(
386
("data", "expected_dtype"),
387
[
388
(100, pl.Int64),
389
(8.5, pl.Float64),
390
("서울특별시", pl.String),
391
(date.today(), pl.Date),
392
(datetime.now(), pl.Datetime("us")),
393
(time(23, 59, 59), pl.Time),
394
(timedelta(hours=7, seconds=123), pl.Duration("us")),
395
],
396
)
397
def test_unknown_dtype(data: Any, expected_dtype: PolarsDataType) -> None:
398
# if given 'Unknown', should be able to infer the correct dtype
399
s = pl.Series([data], dtype=Unknown)
400
assert s.dtype == expected_dtype
401
assert s.to_list() == [data]
402
403
404
def test_various() -> None:
405
a = pl.Series("a", [1, 2])
406
assert a.is_null().sum() == 0
407
assert a.name == "a"
408
409
a = a.rename("b")
410
assert a.name == "b"
411
assert a.len() == 2
412
assert len(a) == 2
413
414
a.append(a.clone())
415
assert_series_equal(a, pl.Series("b", [1, 2, 1, 2]))
416
417
a = pl.Series("a", range(20))
418
assert a.head(5).len() == 5
419
assert a.tail(5).len() == 5
420
assert (a.head(5) != a.tail(5)).all()
421
422
a = pl.Series("a", [2, 1, 4])
423
a.sort(in_place=True)
424
assert_series_equal(a, pl.Series("a", [1, 2, 4]))
425
a = pl.Series("a", [2, 1, 1, 4, 4, 4])
426
assert_series_equal(
427
a.arg_unique(), pl.Series("a", [0, 1, 3], dtype=pl.get_index_type())
428
)
429
430
assert_series_equal(a.gather([2, 3]), pl.Series("a", [1, 4]))
431
432
433
def test_series_dtype_is() -> None:
434
s = pl.Series("s", [1, 2, 3])
435
436
assert s.dtype.is_numeric()
437
assert s.dtype.is_integer()
438
assert s.dtype.is_signed_integer()
439
assert not s.dtype.is_unsigned_integer()
440
assert (s * 0.99).dtype.is_float()
441
442
s = pl.Series("s", [1, 2, 3], dtype=pl.UInt8)
443
assert s.dtype.is_numeric()
444
assert s.dtype.is_integer()
445
assert not s.dtype.is_signed_integer()
446
assert s.dtype.is_unsigned_integer()
447
448
s = pl.Series("bool", [True, None, False])
449
assert not s.dtype.is_numeric()
450
451
s = pl.Series("s", ["testing..."])
452
assert s.dtype == pl.String
453
assert s.dtype != pl.Boolean
454
455
s = pl.Series("s", [], dtype=pl.Decimal(20, 15))
456
assert not s.dtype.is_float()
457
assert s.dtype.is_numeric()
458
assert s.is_empty()
459
460
s = pl.Series("s", [], dtype=pl.Datetime("ms", time_zone="UTC"))
461
assert s.dtype.is_temporal()
462
463
464
def test_series_head_tail_limit() -> None:
465
s = pl.Series(range(10))
466
467
assert_series_equal(s.head(5), pl.Series(range(5)))
468
assert_series_equal(s.limit(5), s.head(5))
469
assert_series_equal(s.tail(5), pl.Series(range(5, 10)))
470
471
# check if it doesn't fail when out of bounds
472
assert s.head(100).len() == 10
473
assert s.limit(100).len() == 10
474
assert s.tail(100).len() == 10
475
476
# negative values
477
assert_series_equal(s.head(-7), pl.Series(range(3)))
478
assert s.head(-2).len() == 8
479
assert_series_equal(s.tail(-8), pl.Series(range(8, 10)))
480
assert s.head(-6).len() == 4
481
482
# negative values out of bounds
483
assert s.head(-12).len() == 0
484
assert s.limit(-12).len() == 0
485
assert s.tail(-12).len() == 0
486
487
488
def test_filter_ops() -> None:
489
a = pl.Series("a", range(20))
490
assert a.filter(a > 1).len() == 18
491
assert a.filter(a < 1).len() == 1
492
assert a.filter(a <= 1).len() == 2
493
assert a.filter(a >= 1).len() == 19
494
assert a.filter(a == 1).len() == 1
495
assert a.filter(a != 1).len() == 19
496
497
498
def test_cast() -> None:
499
a = pl.Series("a", range(20))
500
501
assert a.cast(pl.Float32).dtype == pl.Float32
502
assert a.cast(pl.Float64).dtype == pl.Float64
503
assert a.cast(pl.Int32).dtype == pl.Int32
504
assert a.cast(pl.UInt32).dtype == pl.UInt32
505
assert a.cast(pl.Datetime).dtype == pl.Datetime
506
assert a.cast(pl.Date).dtype == pl.Date
507
508
# display failed values, GH#4706
509
with pytest.raises(InvalidOperationError, match="foobar"):
510
pl.Series(["1", "2", "3", "4", "foobar"]).cast(int)
511
512
513
@pytest.mark.parametrize(
514
"test_data",
515
[
516
[1, None, 2],
517
["abc", None, "xyz"],
518
[None, datetime.now()],
519
[[1, 2], [3, 4], None],
520
],
521
)
522
def test_to_pandas(test_data: list[Any]) -> None:
523
a = pl.Series("s", test_data)
524
b = a.to_pandas()
525
526
assert a.name == b.name
527
assert b.isnull().sum() == 1
528
529
vals_b: list[Any]
530
if a.dtype == pl.List:
531
vals_b = [(None if x is None else x.tolist()) for x in b]
532
else:
533
vals_b = b.replace({np.nan: None}).values.tolist() # type: ignore[dict-item]
534
535
assert vals_b == test_data
536
537
try:
538
c = a.to_pandas(use_pyarrow_extension_array=True)
539
assert a.name == c.name
540
assert c.isnull().sum() == 1
541
vals_c = [None if x is pd.NA else x for x in c.tolist()]
542
assert vals_c == test_data
543
except ModuleNotFoundError:
544
# Skip test if pandas>=1.5.0 or Pyarrow>=8.0.0 is not installed.
545
pass
546
547
548
def test_series_to_list() -> None:
549
s = pl.Series("a", range(20))
550
result = s.to_list()
551
assert isinstance(result, list)
552
assert len(result) == 20
553
554
a = pl.Series("a", [1, None, 2])
555
assert a.null_count() == 1
556
assert a.to_list() == [1, None, 2]
557
558
559
@pytest.mark.may_fail_cloud # reason: list.to_struct is a eager operation
560
def test_to_struct() -> None:
561
s = pl.Series("nums", ["12 34", "56 78", "90 00"]).str.extract_all(r"\d+")
562
563
assert s.list.to_struct().struct.fields == ["field_0", "field_1"]
564
assert s.list.to_struct(fields=lambda idx: f"n{idx:02}").struct.fields == [
565
"n00",
566
"n01",
567
]
568
assert_frame_equal(
569
s.list.to_struct(fields=["one", "two"]).struct.unnest(),
570
pl.DataFrame({"one": ["12", "56", "90"], "two": ["34", "78", "00"]}),
571
)
572
573
574
def test_to_struct_empty() -> None:
575
df = pl.DataFrame({"y": [[], [], []]}, schema={"y": pl.List(pl.Int64)})
576
empty_df = df.select(pl.col("y").list.to_struct(fields=[]).struct.unnest())
577
assert empty_df.shape == (0, 0)
578
579
580
def test_sort() -> None:
581
a = pl.Series("a", [2, 1, 3])
582
assert_series_equal(a.sort(), pl.Series("a", [1, 2, 3]))
583
assert_series_equal(a.sort(descending=True), pl.Series("a", [3, 2, 1]))
584
585
586
def test_rechunk() -> None:
587
a = pl.Series("a", [1, 2, 3])
588
b = pl.Series("b", [4, 5, 6])
589
a.append(b)
590
assert a.n_chunks() == 2
591
assert a.rechunk(in_place=False).n_chunks() == 1
592
a.rechunk(in_place=True)
593
assert a.n_chunks() == 1
594
595
596
def test_indexing() -> None:
597
a = pl.Series("a", [1, 2, None])
598
assert a[1] == 2
599
assert a[2] is None
600
b = pl.Series("b", [True, False])
601
assert b[0]
602
assert not b[1]
603
a = pl.Series("a", ["a", None])
604
assert a[0] == "a"
605
assert a[1] is None
606
a = pl.Series("a", [0.1, None])
607
assert a[0] == 0.1
608
assert a[1] is None
609
610
611
def test_arrow() -> None:
612
a = pl.Series("a", [1, 2, 3, None])
613
out = a.to_arrow()
614
assert out == pa.array([1, 2, 3, None])
615
616
b = pl.Series("b", [1.0, 2.0, 3.0, None])
617
out = b.to_arrow()
618
assert out == pa.array([1.0, 2.0, 3.0, None])
619
620
c = pl.Series("c", ["A", "BB", "CCC", None])
621
out = c.to_arrow()
622
assert out == pa.array(["A", "BB", "CCC", None], type=pa.large_string())
623
assert_series_equal(pl.from_arrow(out), c.rename("")) # type: ignore[arg-type]
624
625
out = c.to_frame().to_arrow()["c"]
626
assert isinstance(out, (pa.Array, pa.ChunkedArray))
627
assert_series_equal(pl.from_arrow(out), c) # type: ignore[arg-type]
628
assert_series_equal(pl.from_arrow(out, schema=["x"]), c.rename("x")) # type: ignore[arg-type]
629
630
d = pl.Series("d", [None, None, None], pl.Null)
631
out = d.to_arrow()
632
assert out == pa.nulls(3)
633
634
s = cast(
635
"pl.Series",
636
pl.from_arrow(pa.array([["foo"], ["foo", "bar"]], pa.list_(pa.utf8()))),
637
)
638
assert s.dtype == pl.List
639
640
641
def test_arrow_cat() -> None:
642
# categorical dtype tests (including various forms of empty pyarrow array)
643
arr0 = pa.array(["foo", "bar"], pa.dictionary(pa.int32(), pa.utf8()))
644
assert_series_equal(
645
pl.Series("arr", ["foo", "bar"], pl.Categorical), pl.Series("arr", arr0)
646
)
647
arr1 = pa.array(["xxx", "xxx", None, "yyy"]).dictionary_encode()
648
arr2 = pa.chunked_array([], arr1.type)
649
arr3 = pa.array([], arr1.type)
650
arr4 = pa.array([]).dictionary_encode()
651
652
assert_series_equal(
653
pl.Series("arr", ["xxx", "xxx", None, "yyy"], dtype=pl.Categorical),
654
pl.Series("arr", arr1),
655
)
656
for arr in (arr2, arr3):
657
assert_series_equal(
658
pl.Series("arr", [], dtype=pl.Categorical), pl.Series("arr", arr)
659
)
660
assert_series_equal(pl.Series("arr", [], dtype=pl.Null), pl.Series("arr", arr4))
661
662
663
def test_pycapsule_interface() -> None:
664
a = pl.Series("a", [1, 2, 3, None])
665
out = pa.chunked_array(PyCapsuleStreamHolder(a))
666
out_arr = out.combine_chunks()
667
assert out_arr == pa.array([1, 2, 3, None])
668
669
670
def test_get() -> None:
671
a = pl.Series("a", [1, 2, 3])
672
pos_idxs = pl.Series("idxs", [2, 0, 1, 0], dtype=pl.Int8)
673
neg_and_pos_idxs = pl.Series(
674
"neg_and_pos_idxs", [-2, 1, 0, -1, 2, -3], dtype=pl.Int8
675
)
676
empty_idxs = pl.Series("idxs", [], dtype=pl.Int8)
677
empty_ints: list[int] = []
678
assert a[0] == 1
679
assert a[:2].to_list() == [1, 2]
680
assert a[range(1)].to_list() == [1]
681
assert a[range(0, 4, 2)].to_list() == [1, 3]
682
assert a[:0].to_list() == []
683
assert a[empty_ints].to_list() == []
684
assert a[neg_and_pos_idxs.to_list()].to_list() == [2, 2, 1, 3, 3, 1]
685
for dtype in (
686
pl.UInt8,
687
pl.UInt16,
688
pl.UInt32,
689
pl.UInt64,
690
pl.Int8,
691
pl.Int16,
692
pl.Int32,
693
pl.Int64,
694
):
695
assert a[pos_idxs.cast(dtype)].to_list() == [3, 1, 2, 1]
696
assert a[pos_idxs.cast(dtype).to_numpy()].to_list() == [3, 1, 2, 1]
697
assert a[empty_idxs.cast(dtype)].to_list() == []
698
assert a[empty_idxs.cast(dtype).to_numpy()].to_list() == []
699
700
for dtype in (pl.Int8, pl.Int16, pl.Int32, pl.Int64):
701
nps = a[neg_and_pos_idxs.cast(dtype).to_numpy()]
702
assert nps.to_list() == [2, 2, 1, 3, 3, 1]
703
704
705
def test_set() -> None:
706
a = pl.Series("a", [True, False, True])
707
mask = pl.Series("msk", [True, False, True])
708
a[mask] = False
709
assert_series_equal(a, pl.Series("a", [False] * 3))
710
711
712
def test_set_value_as_list_fail() -> None:
713
# only allowed for numerical physical types
714
s = pl.Series("a", [1, 2, 3])
715
s[[0, 2]] = [4, 5]
716
assert s.to_list() == [4, 2, 5]
717
718
# for other types it is not allowed
719
s = pl.Series("a", ["a", "b", "c"])
720
with pytest.raises(TypeError):
721
s[[0, 1]] = ["d", "e"]
722
723
s = pl.Series("a", [True, False, False])
724
with pytest.raises(TypeError):
725
s[[0, 1]] = [True, False]
726
727
728
@pytest.mark.parametrize("key", [True, False, 1.0])
729
def test_set_invalid_key(key: Any) -> None:
730
s = pl.Series("a", [1, 2, 3])
731
with pytest.raises(TypeError):
732
s[key] = 1
733
734
735
@pytest.mark.parametrize(
736
"key",
737
[
738
pl.Series([False, True, True]),
739
pl.Series([1, 2], dtype=UInt32),
740
pl.Series([1, 2], dtype=UInt64),
741
],
742
)
743
def test_set_key_series(key: pl.Series) -> None:
744
"""Only UInt32/UInt64/bool are allowed."""
745
s = pl.Series("a", [1, 2, 3])
746
s[key] = 4
747
assert_series_equal(s, pl.Series("a", [1, 4, 4]))
748
749
750
def test_set_np_array_boolean_mask() -> None:
751
a = pl.Series("a", [1, 2, 3])
752
mask = np.array([True, False, True])
753
a[mask] = 4
754
assert_series_equal(a, pl.Series("a", [4, 2, 4]))
755
756
757
@pytest.mark.parametrize("dtype", [np.int32, np.int64, np.uint32, np.uint64])
758
def test_set_np_array(dtype: Any) -> None:
759
a = pl.Series("a", [1, 2, 3])
760
idx = np.array([0, 2], dtype=dtype)
761
a[idx] = 4
762
assert_series_equal(a, pl.Series("a", [4, 2, 4]))
763
764
765
@pytest.mark.parametrize("idx", [[0, 2], (0, 2)])
766
def test_set_list_and_tuple(idx: list[int] | tuple[int]) -> None:
767
a = pl.Series("a", [1, 2, 3])
768
a[idx] = 4
769
assert_series_equal(a, pl.Series("a", [4, 2, 4]))
770
771
772
def test_init_nested_tuple() -> None:
773
s1 = pl.Series("s", (1, 2, 3))
774
assert s1.to_list() == [1, 2, 3]
775
776
s2 = pl.Series("s", ((1, 2, 3),), dtype=pl.List(pl.UInt8))
777
assert s2.to_list() == [[1, 2, 3]]
778
assert s2.dtype == pl.List(pl.UInt8)
779
780
s3 = pl.Series("s", ((1, 2, 3), (1, 2, 3)), dtype=pl.List(pl.Int32))
781
assert s3.to_list() == [[1, 2, 3], [1, 2, 3]]
782
assert s3.dtype == pl.List(pl.Int32)
783
784
785
def test_fill_null() -> None:
786
s = pl.Series("a", [1, 2, None])
787
assert_series_equal(s.fill_null(strategy="forward"), pl.Series("a", [1, 2, 2]))
788
assert_series_equal(s.fill_null(14), pl.Series("a", [1, 2, 14], dtype=Int64))
789
790
a = pl.Series("a", [0.0, 1.0, None, 2.0, None, 3.0])
791
792
assert a.fill_null(0).to_list() == [0.0, 1.0, 0.0, 2.0, 0.0, 3.0]
793
assert a.fill_null(strategy="zero").to_list() == [0.0, 1.0, 0.0, 2.0, 0.0, 3.0]
794
assert a.fill_null(strategy="max").to_list() == [0.0, 1.0, 3.0, 2.0, 3.0, 3.0]
795
assert a.fill_null(strategy="min").to_list() == [0.0, 1.0, 0.0, 2.0, 0.0, 3.0]
796
assert a.fill_null(strategy="one").to_list() == [0.0, 1.0, 1.0, 2.0, 1.0, 3.0]
797
assert a.fill_null(strategy="forward").to_list() == [0.0, 1.0, 1.0, 2.0, 2.0, 3.0]
798
assert a.fill_null(strategy="backward").to_list() == [0.0, 1.0, 2.0, 2.0, 3.0, 3.0]
799
assert a.fill_null(strategy="mean").to_list() == [0.0, 1.0, 1.5, 2.0, 1.5, 3.0]
800
assert a.forward_fill().to_list() == [0.0, 1.0, 1.0, 2.0, 2.0, 3.0]
801
assert a.backward_fill().to_list() == [0.0, 1.0, 2.0, 2.0, 3.0, 3.0]
802
803
b = pl.Series("b", ["a", None, "c", None, "e"])
804
assert b.fill_null(strategy="min").to_list() == ["a", "a", "c", "a", "e"]
805
assert b.fill_null(strategy="max").to_list() == ["a", "e", "c", "e", "e"]
806
assert b.fill_null(strategy="zero").to_list() == ["a", "", "c", "", "e"]
807
assert b.fill_null(strategy="forward").to_list() == ["a", "a", "c", "c", "e"]
808
assert b.fill_null(strategy="backward").to_list() == ["a", "c", "c", "e", "e"]
809
810
c = pl.Series("c", [b"a", None, b"c", None, b"e"])
811
assert c.fill_null(strategy="min").to_list() == [b"a", b"a", b"c", b"a", b"e"]
812
assert c.fill_null(strategy="max").to_list() == [b"a", b"e", b"c", b"e", b"e"]
813
assert c.fill_null(strategy="zero").to_list() == [b"a", b"", b"c", b"", b"e"]
814
assert c.fill_null(strategy="forward").to_list() == [b"a", b"a", b"c", b"c", b"e"]
815
assert c.fill_null(strategy="backward").to_list() == [b"a", b"c", b"c", b"e", b"e"]
816
817
df = pl.DataFrame(
818
[
819
pl.Series("i32", [1, 2, None], dtype=pl.Int32),
820
pl.Series("i64", [1, 2, None], dtype=pl.Int64),
821
pl.Series("f32", [1, 2, None], dtype=pl.Float32),
822
pl.Series("cat", ["a", "b", None], dtype=pl.Categorical),
823
pl.Series("str", ["a", "b", None], dtype=pl.String),
824
pl.Series("bool", [True, True, None], dtype=pl.Boolean),
825
]
826
)
827
828
assert df.fill_null(0, matches_supertype=False).fill_null("bar").fill_null(
829
False
830
).to_dict(as_series=False) == {
831
"i32": [1, 2, None],
832
"i64": [1, 2, 0],
833
"f32": [1.0, 2.0, None],
834
"cat": ["a", "b", "bar"],
835
"str": ["a", "b", "bar"],
836
"bool": [True, True, False],
837
}
838
839
assert df.fill_null(0, matches_supertype=True).fill_null("bar").fill_null(
840
False
841
).to_dict(as_series=False) == {
842
"i32": [1, 2, 0],
843
"i64": [1, 2, 0],
844
"f32": [1.0, 2.0, 0.0],
845
"cat": ["a", "b", "bar"],
846
"str": ["a", "b", "bar"],
847
"bool": [True, True, False],
848
}
849
df = pl.DataFrame({"a": [1, None, 2, None]})
850
851
out = df.with_columns(
852
pl.col("a").cast(pl.UInt8).alias("u8"),
853
pl.col("a").cast(pl.UInt16).alias("u16"),
854
pl.col("a").cast(pl.UInt32).alias("u32"),
855
pl.col("a").cast(pl.UInt64).alias("u64"),
856
).fill_null(3)
857
858
assert out.to_dict(as_series=False) == {
859
"a": [1, 3, 2, 3],
860
"u8": [1, 3, 2, 3],
861
"u16": [1, 3, 2, 3],
862
"u32": [1, 3, 2, 3],
863
"u64": [1, 3, 2, 3],
864
}
865
assert out.dtypes == [pl.Int64, pl.UInt8, pl.UInt16, pl.UInt32, pl.UInt64]
866
867
868
def test_str_series_min_max_10674() -> None:
869
str_series = pl.Series("b", ["a", None, "c", None, "e"], dtype=pl.String)
870
assert str_series.min() == "a"
871
assert str_series.max() == "e"
872
assert str_series.sort(descending=False).min() == "a"
873
assert str_series.sort(descending=True).max() == "e"
874
875
876
def test_fill_nan() -> None:
877
nan = float("nan")
878
a = pl.Series("a", [1.0, nan, 2.0, nan, 3.0])
879
assert_series_equal(a.fill_nan(None), pl.Series("a", [1.0, None, 2.0, None, 3.0]))
880
assert_series_equal(a.fill_nan(0), pl.Series("a", [1.0, 0.0, 2.0, 0.0, 3.0]))
881
882
883
def test_map_elements() -> None:
884
a = pl.Series("a", [1, 2, None])
885
with pytest.warns(PolarsInefficientMapWarning):
886
b = a.map_elements(lambda x: x**2, return_dtype=pl.Int64)
887
assert list(b) == [1, 4, None]
888
889
a = pl.Series("a", ["foo", "bar", None])
890
with pytest.warns(PolarsInefficientMapWarning):
891
b = a.map_elements(lambda x: x + "py", return_dtype=pl.String)
892
assert list(b) == ["foopy", "barpy", None]
893
894
b = a.map_elements(lambda x: len(x), return_dtype=pl.Int32)
895
assert list(b) == [3, 3, None]
896
897
b = a.map_elements(lambda x: len(x))
898
assert list(b) == [3, 3, None]
899
900
# just check that it runs (somehow problem with conditional compilation)
901
a = pl.Series("a", [2, 2, 3]).cast(pl.Datetime)
902
a.map_elements(lambda x: x)
903
a = pl.Series("a", [2, 2, 3]).cast(pl.Date)
904
a.map_elements(lambda x: x)
905
906
907
def test_shape() -> None:
908
s = pl.Series([1, 2, 3])
909
assert s.shape == (3,)
910
911
912
@pytest.mark.parametrize("arrow_available", [True, False])
913
def test_create_list_series(
914
arrow_available: bool, plmonkeypatch: PlMonkeyPatch
915
) -> None:
916
plmonkeypatch.setattr(pl.series.series, "_PYARROW_AVAILABLE", arrow_available)
917
a = [[1, 2], None, [None, 3]]
918
s = pl.Series("", a)
919
assert s.to_list() == a
920
921
922
def test_iter() -> None:
923
s = pl.Series("", [1, 2, 3])
924
925
itr = s.__iter__()
926
assert itr.__next__() == 1
927
assert itr.__next__() == 2
928
assert itr.__next__() == 3
929
assert sum(s) == 6
930
931
932
def test_empty() -> None:
933
a = pl.Series(dtype=pl.Int8)
934
assert a.dtype == pl.Int8
935
assert a.is_empty()
936
937
a = pl.Series()
938
assert a.dtype == pl.Null
939
assert a.is_empty()
940
941
a = pl.Series("name", [])
942
assert a.dtype == pl.Null
943
assert a.is_empty()
944
945
a = pl.Series(values=(), dtype=pl.Int8)
946
assert a.dtype == pl.Int8
947
assert a.is_empty()
948
949
assert_series_equal(pl.Series(), pl.Series())
950
assert_series_equal(
951
pl.Series(dtype=pl.Int32), pl.Series(dtype=pl.Int64), check_dtypes=False
952
)
953
954
with pytest.raises(TypeError, match="ambiguous"):
955
not pl.Series()
956
957
958
def test_round() -> None:
959
a = pl.Series("f", [1.003, 2.003])
960
b = a.round(2)
961
assert b.to_list() == [1.00, 2.00]
962
963
b = a.round()
964
assert b.to_list() == [1.0, 2.0]
965
966
967
def test_round_int() -> None:
968
s = pl.Series([1, 2, 3])
969
assert_series_equal(s, s.round())
970
971
972
@pytest.mark.parametrize(
973
("series", "digits", "expected_result"),
974
[
975
pytest.param(pl.Series([1.234, 0.1234]), 2, pl.Series([1.2, 0.12]), id="f64"),
976
pytest.param(
977
pl.Series([1.234, 0.1234]).cast(pl.Float32),
978
2,
979
pl.Series([1.2, 0.12]).cast(pl.Float32),
980
id="f32",
981
),
982
pytest.param(pl.Series([123400, 1234]), 2, pl.Series([120000, 1200]), id="i64"),
983
pytest.param(
984
pl.Series([123400, 1234]).cast(pl.Int32),
985
2,
986
pl.Series([120000, 1200]).cast(pl.Int32),
987
id="i32",
988
),
989
pytest.param(
990
pl.Series([0.0]), 2, pl.Series([0.0]), id="0 should remain the same"
991
),
992
],
993
)
994
def test_round_sig_figs(
995
series: pl.Series, digits: int, expected_result: pl.Series
996
) -> None:
997
result = series.round_sig_figs(digits=digits)
998
assert_series_equal(result, expected_result)
999
1000
1001
def test_round_sig_figs_raises_exc() -> None:
1002
with pytest.raises(pl.exceptions.InvalidOperationError):
1003
pl.Series([1.234, 0.1234]).round_sig_figs(digits=0)
1004
1005
1006
def test_apply_list_out() -> None:
1007
s = pl.Series("count", [3, 2, 2])
1008
out = s.map_elements(lambda val: pl.repeat(val, val, eager=True))
1009
assert out[0].to_list() == [3, 3, 3]
1010
assert out[1].to_list() == [2, 2]
1011
assert out[2].to_list() == [2, 2]
1012
1013
1014
def test_reinterpret() -> None:
1015
s = pl.Series("a", [1, 1, 2], dtype=pl.UInt64)
1016
assert s.reinterpret(signed=True).dtype == pl.Int64
1017
df = pl.DataFrame([s])
1018
assert df.select([pl.col("a").reinterpret(signed=True)])["a"].dtype == pl.Int64
1019
1020
1021
def test_mode() -> None:
1022
s = pl.Series("a", [1, 1, 2])
1023
assert s.mode().to_list() == [1]
1024
assert s.set_sorted().mode().to_list() == [1]
1025
1026
df = pl.DataFrame([s])
1027
assert df.select([pl.col("a").mode()])["a"].to_list() == [1]
1028
assert (
1029
pl.Series(["foo", "bar", "buz", "bar"], dtype=pl.Categorical).mode().item()
1030
== "bar"
1031
)
1032
assert pl.Series([1.0, 2.0, 3.0, 2.0]).mode().item() == 2.0
1033
assert pl.Series(["a", "b", "c", "b"]).mode().item() == "b"
1034
1035
# sorted data
1036
assert set(pl.int_range(0, 3, eager=True).mode().to_list()) == {0, 1, 2}
1037
1038
1039
def test_diff() -> None:
1040
s = pl.Series("a", [1, 2, 3, 2, 2, 3, 0])
1041
1042
assert_series_equal(
1043
s.diff(),
1044
pl.Series("a", [None, 1, 1, -1, 0, 1, -3]),
1045
)
1046
assert_series_equal(
1047
s.diff(null_behavior="drop"),
1048
pl.Series("a", [1, 1, -1, 0, 1, -3]),
1049
)
1050
1051
1052
def test_diff_negative() -> None:
1053
s = pl.Series("a", [1, 2, 3, 2, 2, 3, 0])
1054
1055
assert_series_equal(
1056
s.diff(-1),
1057
pl.Series("a", [-1, -1, 1, 0, -1, 3, None]),
1058
)
1059
assert_series_equal(
1060
s.diff(-1, null_behavior="drop"),
1061
pl.Series("a", [-1, -1, 1, 0, -1, 3]),
1062
)
1063
1064
1065
def test_skew() -> None:
1066
s = pl.Series("a", [1, 2, 3, 2, 2, 3, 0])
1067
1068
assert s.skew(bias=True) == pytest.approx(-0.5953924651018018)
1069
assert s.skew(bias=False) == pytest.approx(-0.7717168360221258)
1070
1071
df = pl.DataFrame([s])
1072
assert np.isclose(
1073
df.select(pl.col("a").skew(bias=False))["a"][0], -0.7717168360221258
1074
)
1075
1076
1077
def test_kurtosis() -> None:
1078
s = pl.Series("a", [1, 2, 3, 2, 2, 3, 0])
1079
expected = -0.6406250000000004
1080
1081
assert s.kurtosis() == pytest.approx(expected)
1082
df = pl.DataFrame([s])
1083
assert np.isclose(df.select(pl.col("a").kurtosis())["a"][0], expected)
1084
1085
1086
def test_sqrt() -> None:
1087
s = pl.Series("a", [1, 2])
1088
assert_series_equal(s.sqrt(), pl.Series("a", [1.0, np.sqrt(2)]))
1089
df = pl.DataFrame([s])
1090
assert_series_equal(
1091
df.select(pl.col("a").sqrt())["a"], pl.Series("a", [1.0, np.sqrt(2)])
1092
)
1093
1094
1095
def test_cbrt() -> None:
1096
s = pl.Series("a", [1, 2])
1097
assert_series_equal(s.cbrt(), pl.Series("a", [1.0, np.cbrt(2)]))
1098
df = pl.DataFrame([s])
1099
assert_series_equal(
1100
df.select(pl.col("a").cbrt())["a"], pl.Series("a", [1.0, np.cbrt(2)])
1101
)
1102
1103
1104
def test_range() -> None:
1105
s1 = pl.Series("a", [1, 2, 3, 2, 2, 3, 0])
1106
assert_series_equal(s1[2:5], s1[range(2, 5)])
1107
1108
ranges = [range(-2, 1), range(3), range(2, 8, 2)]
1109
1110
s2 = pl.Series("b", ranges, dtype=pl.List(pl.Int8))
1111
assert s2.to_list() == [[-2, -1, 0], [0, 1, 2], [2, 4, 6]]
1112
assert s2.dtype == pl.List(pl.Int8)
1113
assert s2.name == "b"
1114
1115
s3 = pl.Series("c", (ranges for _ in range(3)))
1116
assert s3.to_list() == [
1117
[[-2, -1, 0], [0, 1, 2], [2, 4, 6]],
1118
[[-2, -1, 0], [0, 1, 2], [2, 4, 6]],
1119
[[-2, -1, 0], [0, 1, 2], [2, 4, 6]],
1120
]
1121
assert s3.dtype == pl.List(pl.List(pl.Int64))
1122
1123
df = pl.DataFrame([s1])
1124
assert_frame_equal(df[2:5], df[range(2, 5)])
1125
1126
1127
def test_strict_cast() -> None:
1128
with pytest.raises(InvalidOperationError):
1129
pl.Series("a", [2**16]).cast(dtype=pl.Int16, strict=True)
1130
with pytest.raises(InvalidOperationError):
1131
pl.DataFrame({"a": [2**16]}).select([pl.col("a").cast(pl.Int16, strict=True)])
1132
1133
1134
def test_floor_divide() -> None:
1135
s = pl.Series("a", [1, 2, 3])
1136
assert_series_equal(s // 2, pl.Series("a", [0, 1, 1]))
1137
assert_series_equal(
1138
pl.DataFrame([s]).select(pl.col("a") // 2)["a"], pl.Series("a", [0, 1, 1])
1139
)
1140
1141
1142
def test_true_divide() -> None:
1143
s = pl.Series("a", [1, 2])
1144
assert_series_equal(s / 2, pl.Series("a", [0.5, 1.0]))
1145
assert_series_equal(
1146
pl.DataFrame([s]).select(pl.col("a") / 2)["a"], pl.Series("a", [0.5, 1.0])
1147
)
1148
1149
# rtruediv
1150
assert_series_equal(
1151
pl.DataFrame([s]).select(2 / pl.col("a"))["literal"],
1152
pl.Series("literal", [2.0, 1.0]),
1153
)
1154
1155
# https://github.com/pola-rs/polars/issues/1369
1156
vals = [3000000000, 2, 3]
1157
foo = pl.Series(vals)
1158
assert_series_equal(foo / 1, pl.Series(vals, dtype=Float64))
1159
assert_series_equal(
1160
pl.DataFrame({"a": vals}).select([pl.col("a") / 1])["a"],
1161
pl.Series("a", vals, dtype=Float64),
1162
)
1163
1164
1165
def test_bitwise() -> None:
1166
a = pl.Series("a", [1, 2, 3])
1167
b = pl.Series("b", [3, 4, 5])
1168
assert_series_equal(a & b, pl.Series("a", [1, 0, 1]))
1169
assert_series_equal(a | b, pl.Series("a", [3, 6, 7]))
1170
assert_series_equal(a ^ b, pl.Series("a", [2, 6, 6]))
1171
1172
df = pl.DataFrame([a, b])
1173
out = df.select(
1174
(pl.col("a") & pl.col("b")).alias("and"),
1175
(pl.col("a") | pl.col("b")).alias("or"),
1176
(pl.col("a") ^ pl.col("b")).alias("xor"),
1177
)
1178
assert_series_equal(out["and"], pl.Series("and", [1, 0, 1]))
1179
assert_series_equal(out["or"], pl.Series("or", [3, 6, 7]))
1180
assert_series_equal(out["xor"], pl.Series("xor", [2, 6, 6]))
1181
1182
# ensure mistaken use of logical 'and'/'or' raises an exception
1183
with pytest.raises(TypeError, match="ambiguous"):
1184
a and b # type: ignore[redundant-expr]
1185
1186
with pytest.raises(TypeError, match="ambiguous"):
1187
a or b # type: ignore[redundant-expr]
1188
1189
1190
def test_from_generator_or_iterable() -> None:
1191
# generator function
1192
def gen(n: int) -> Iterator[int]:
1193
yield from range(n)
1194
1195
# iterable object
1196
class Data:
1197
def __init__(self, n: int) -> None:
1198
self._n = n
1199
1200
def __iter__(self) -> Iterator[int]:
1201
yield from gen(self._n)
1202
1203
expected = pl.Series("s", range(10))
1204
assert expected.dtype == pl.Int64
1205
1206
for generated_series in (
1207
pl.Series("s", values=gen(10)),
1208
pl.Series("s", values=Data(10)),
1209
pl.Series("s", values=(x for x in gen(10))),
1210
):
1211
assert_series_equal(expected, generated_series)
1212
1213
# test 'iterable_to_pyseries' directly to validate 'chunk_size' behaviour
1214
ps1 = iterable_to_pyseries("s", gen(10), dtype=pl.UInt8)
1215
ps2 = iterable_to_pyseries("s", gen(10), dtype=pl.UInt8, chunk_size=3)
1216
ps3 = iterable_to_pyseries("s", Data(10), dtype=pl.UInt8, chunk_size=6)
1217
1218
expected = pl.Series("s", range(10), dtype=pl.UInt8)
1219
assert expected.dtype == pl.UInt8
1220
1221
for ps in (ps1, ps2, ps3):
1222
generated_series = pl.Series("s")
1223
generated_series._s = ps
1224
assert_series_equal(expected, generated_series)
1225
1226
# empty generator
1227
assert_series_equal(pl.Series("s", []), pl.Series("s", values=gen(0)))
1228
1229
1230
def test_from_sequences(plmonkeypatch: PlMonkeyPatch) -> None:
1231
# test int, str, bool, flt
1232
values = [
1233
[[1], [None, 3]],
1234
[["foo"], [None, "bar"]],
1235
[[True], [None, False]],
1236
[[1.0], [None, 3.0]],
1237
]
1238
1239
for vals in values:
1240
plmonkeypatch.setattr(pl.series.series, "_PYARROW_AVAILABLE", False)
1241
a = pl.Series("a", vals)
1242
plmonkeypatch.setattr(pl.series.series, "_PYARROW_AVAILABLE", True)
1243
b = pl.Series("a", vals)
1244
assert_series_equal(a, b)
1245
assert a.to_list() == vals
1246
1247
1248
def test_comparisons_int_series_to_float() -> None:
1249
srs_int = pl.Series([1, 2, 3, 4])
1250
1251
assert_series_equal(srs_int - 1.0, pl.Series([0.0, 1.0, 2.0, 3.0]))
1252
assert_series_equal(srs_int + 1.0, pl.Series([2.0, 3.0, 4.0, 5.0]))
1253
assert_series_equal(srs_int * 2.0, pl.Series([2.0, 4.0, 6.0, 8.0]))
1254
assert_series_equal(srs_int / 2.0, pl.Series([0.5, 1.0, 1.5, 2.0]))
1255
assert_series_equal(srs_int % 2.0, pl.Series([1.0, 0.0, 1.0, 0.0]))
1256
assert_series_equal(4.0 % srs_int, pl.Series([0.0, 0.0, 1.0, 0.0]))
1257
1258
assert_series_equal(srs_int // 2.0, pl.Series([0.0, 1.0, 1.0, 2.0]))
1259
assert_series_equal(srs_int < 3.0, pl.Series([True, True, False, False]))
1260
assert_series_equal(srs_int <= 3.0, pl.Series([True, True, True, False]))
1261
assert_series_equal(srs_int > 3.0, pl.Series([False, False, False, True]))
1262
assert_series_equal(srs_int >= 3.0, pl.Series([False, False, True, True]))
1263
assert_series_equal(srs_int == 3.0, pl.Series([False, False, True, False]))
1264
assert_series_equal(srs_int - True, pl.Series([0, 1, 2, 3]))
1265
1266
1267
def test_comparisons_int_series_to_float_scalar() -> None:
1268
srs_int = pl.Series([1, 2, 3, 4])
1269
1270
assert_series_equal(srs_int < 1.5, pl.Series([True, False, False, False]))
1271
assert_series_equal(srs_int > 1.5, pl.Series([False, True, True, True]))
1272
1273
1274
def test_comparisons_datetime_series_to_date_scalar() -> None:
1275
srs_date = pl.Series([date(2023, 1, 1), date(2023, 1, 2), date(2023, 1, 3)])
1276
dt = datetime(2023, 1, 1, 12, 0, 0)
1277
1278
assert_series_equal(srs_date < dt, pl.Series([True, False, False]))
1279
assert_series_equal(srs_date > dt, pl.Series([False, True, True]))
1280
1281
1282
def test_comparisons_float_series_to_int() -> None:
1283
srs_float = pl.Series([1.0, 2.0, 3.0, 4.0])
1284
1285
assert_series_equal(srs_float - 1, pl.Series([0.0, 1.0, 2.0, 3.0]))
1286
assert_series_equal(srs_float + 1, pl.Series([2.0, 3.0, 4.0, 5.0]))
1287
assert_series_equal(srs_float * 2, pl.Series([2.0, 4.0, 6.0, 8.0]))
1288
assert_series_equal(srs_float / 2, pl.Series([0.5, 1.0, 1.5, 2.0]))
1289
assert_series_equal(srs_float % 2, pl.Series([1.0, 0.0, 1.0, 0.0]))
1290
assert_series_equal(4 % srs_float, pl.Series([0.0, 0.0, 1.0, 0.0]))
1291
1292
assert_series_equal(srs_float // 2, pl.Series([0.0, 1.0, 1.0, 2.0]))
1293
assert_series_equal(srs_float < 3, pl.Series([True, True, False, False]))
1294
assert_series_equal(srs_float <= 3, pl.Series([True, True, True, False]))
1295
assert_series_equal(srs_float > 3, pl.Series([False, False, False, True]))
1296
assert_series_equal(srs_float >= 3, pl.Series([False, False, True, True]))
1297
assert_series_equal(srs_float == 3, pl.Series([False, False, True, False]))
1298
assert_series_equal(srs_float - True, pl.Series([0.0, 1.0, 2.0, 3.0]))
1299
1300
1301
def test_comparisons_bool_series_to_int() -> None:
1302
srs_bool = pl.Series([True, False])
1303
1304
# (native bool comparison should work...)
1305
for t, f in ((True, False), (False, True)):
1306
assert list(srs_bool == t) == list(srs_bool != f) == [t, f]
1307
1308
# TODO: do we want this to work?
1309
assert_series_equal(srs_bool / 1, pl.Series([True, False], dtype=Float64))
1310
match = (
1311
r"cannot do arithmetic with Series of dtype: Boolean"
1312
r" and argument of type: 'bool'"
1313
)
1314
with pytest.raises(TypeError, match=match):
1315
srs_bool - 1
1316
with pytest.raises(TypeError, match=match):
1317
srs_bool + 1
1318
match = (
1319
r"cannot do arithmetic with Series of dtype: Boolean"
1320
r" and argument of type: 'bool'"
1321
)
1322
with pytest.raises(TypeError, match=match):
1323
srs_bool % 2
1324
with pytest.raises(TypeError, match=match):
1325
srs_bool * 1
1326
1327
from operator import ge, gt, le, lt
1328
1329
for op in (ge, gt, le, lt):
1330
for scalar in (0, 1.0, True, False):
1331
op_str = op.__name__.replace("e", "t_eq")
1332
with pytest.raises(
1333
NotImplementedError,
1334
match=rf"Series of type Boolean does not have {op_str} operator",
1335
):
1336
op(srs_bool, scalar)
1337
1338
1339
@pytest.mark.parametrize(
1340
("values", "compare_with", "compares_equal"),
1341
[
1342
(
1343
[date(1999, 12, 31), date(2021, 1, 31)],
1344
date(2021, 1, 31),
1345
[False, True],
1346
),
1347
(
1348
[datetime(2021, 1, 1, 12, 0, 0), datetime(2021, 1, 2, 12, 0, 0)],
1349
datetime(2021, 1, 1, 12, 0, 0),
1350
[True, False],
1351
),
1352
(
1353
[timedelta(days=1), timedelta(days=2)],
1354
timedelta(days=1),
1355
[True, False],
1356
),
1357
],
1358
)
1359
def test_temporal_comparison(
1360
values: list[Any], compare_with: Any, compares_equal: list[bool]
1361
) -> None:
1362
assert_series_equal(
1363
pl.Series(values) == compare_with,
1364
pl.Series(compares_equal, dtype=pl.Boolean),
1365
)
1366
1367
1368
@pytest.mark.parametrize(
1369
("drop_nulls", "drop_first"),
1370
[
1371
(False, False),
1372
(False, True),
1373
(True, False),
1374
(True, True),
1375
],
1376
)
1377
def test_to_dummies_with_nulls(drop_nulls: bool, drop_first: bool) -> None:
1378
s = pl.Series("s", [None, "a", "a", None, "b", "c"])
1379
expected = pl.DataFrame(
1380
{
1381
"s_a": [0, 1, 1, 0, 0, 0],
1382
"s_b": [0, 0, 0, 0, 1, 0],
1383
"s_c": [0, 0, 0, 0, 0, 1],
1384
"s_null": [1, 0, 0, 1, 0, 0],
1385
}
1386
).cast(pl.UInt8)
1387
1388
if drop_nulls:
1389
expected = expected.drop("s_null")
1390
if drop_first:
1391
expected = expected.drop("s_a")
1392
1393
result = s.to_dummies(drop_nulls=drop_nulls, drop_first=drop_first)
1394
assert_frame_equal(result, expected)
1395
1396
1397
@pytest.mark.parametrize(
1398
("drop_nulls", "drop_first"),
1399
[
1400
(False, False),
1401
(False, True),
1402
(True, False),
1403
(True, True),
1404
],
1405
)
1406
def test_to_dummies_no_nulls(drop_nulls: bool, drop_first: bool) -> None:
1407
s = pl.Series("s", ["a", "a", "b", "c"])
1408
expected = pl.DataFrame(
1409
{
1410
"s_a": [1, 1, 0, 0],
1411
"s_b": [0, 0, 1, 0],
1412
"s_c": [0, 0, 0, 1],
1413
}
1414
).cast(pl.UInt8)
1415
1416
if drop_first:
1417
expected = expected.drop("s_a")
1418
1419
result = s.to_dummies(drop_nulls=drop_nulls, drop_first=drop_first)
1420
assert_frame_equal(result, expected)
1421
1422
1423
def test_to_dummies_null_clash_19096() -> None:
1424
with pytest.raises(
1425
DuplicateError, match="column with name '_null' has more than one occurrence"
1426
):
1427
pl.Series([None, "null"]).to_dummies()
1428
1429
1430
def test_chunk_lengths() -> None:
1431
s = pl.Series("a", [1, 2, 2, 3])
1432
# this is a Series with one chunk, of length 4
1433
assert s.n_chunks() == 1
1434
assert s.chunk_lengths() == [4]
1435
1436
1437
def test_limit() -> None:
1438
s = pl.Series("a", [1, 2, 3])
1439
assert_series_equal(s.limit(2), pl.Series("a", [1, 2]))
1440
1441
1442
def test_filter() -> None:
1443
s = pl.Series("a", [1, 2, 3])
1444
mask = pl.Series("", [True, False, True])
1445
1446
assert_series_equal(s.filter(mask), pl.Series("a", [1, 3]))
1447
assert_series_equal(s.filter([True, False, True]), pl.Series("a", [1, 3]))
1448
assert_series_equal(s.filter(np.array([True, False, True])), pl.Series("a", [1, 3]))
1449
1450
with pytest.raises(RuntimeError, match="Expected a boolean mask"):
1451
s.filter(np.array([1, 0, 1]))
1452
1453
1454
def test_gather_every() -> None:
1455
s = pl.Series("a", [1, 2, 3, 4])
1456
assert_series_equal(s.gather_every(2), pl.Series("a", [1, 3]))
1457
assert_series_equal(s.gather_every(2, offset=1), pl.Series("a", [2, 4]))
1458
1459
1460
def test_arg_sort() -> None:
1461
s = pl.Series("a", [5, 3, 4, 1, 2])
1462
expected = pl.Series("a", [3, 4, 1, 2, 0], dtype=pl.get_index_type())
1463
1464
assert_series_equal(s.arg_sort(), expected)
1465
1466
expected_descending = pl.Series("a", [0, 2, 1, 4, 3], dtype=pl.get_index_type())
1467
assert_series_equal(s.arg_sort(descending=True), expected_descending)
1468
1469
1470
@pytest.mark.parametrize(
1471
("series", "argmin", "argmax"),
1472
[
1473
# Numeric
1474
(pl.Series([5, 3, 4, 1, 2]), 3, 0),
1475
(pl.Series([None, 5, 1]), 2, 1),
1476
(pl.Series([float("nan"), 3.0, 5.0]), 1, 2),
1477
(pl.Series([None, float("nan"), 3.0, 5.0]), 2, 3),
1478
# Boolean
1479
(pl.Series([True, False]), 1, 0),
1480
(pl.Series([True, True]), 0, 0),
1481
(pl.Series([False, False]), 0, 0),
1482
(pl.Series([None, True, False, True]), 2, 1),
1483
(pl.Series([None, True, True]), 1, 1),
1484
(pl.Series([None, False, False]), 1, 1),
1485
# String
1486
(pl.Series(["a", "c", "b"]), 0, 1),
1487
(pl.Series([None, "a", None, "b"]), 1, 3),
1488
# Binary
1489
(pl.Series([b"a", b"c", b"b"]), 0, 1),
1490
(pl.Series([None, b"a", None, b"b"]), 1, 3),
1491
# Decimal
1492
(pl.Series([Decimal("1.1"), Decimal("2.2"), Decimal("0.5")]), 2, 1),
1493
(pl.Series([None, Decimal("1.1"), None, Decimal("2.2")]), 1, 3),
1494
# Categorical
1495
(pl.Series(["c", "b", "a"], dtype=pl.Categorical()), 2, 0),
1496
(pl.Series("s", [None, "c", "b", None, "a"], pl.Categorical()), 4, 1),
1497
],
1498
)
1499
def test_arg_min_arg_max(series: pl.Series, argmin: int, argmax: int) -> None:
1500
assert series.arg_min() == argmin, (
1501
f"values: {series.to_list()}, expected {argmin} got {series.arg_min()}"
1502
)
1503
assert series.arg_max() == argmax, (
1504
f"values: {series.to_list()}, expected {argmax} got {series.arg_max()}"
1505
)
1506
1507
1508
@pytest.mark.parametrize(
1509
("series"),
1510
[
1511
# All nulls
1512
pl.Series([None, None], dtype=pl.Int32),
1513
pl.Series([None, None], dtype=pl.Boolean),
1514
pl.Series([None, None], dtype=pl.String),
1515
pl.Series([None, None], dtype=pl.Categorical),
1516
pl.Series([None, None], dtype=pl.Categorical()),
1517
# Empty Series
1518
pl.Series([], dtype=pl.Int32),
1519
pl.Series([], dtype=pl.Boolean),
1520
pl.Series([], dtype=pl.String),
1521
pl.Series([], dtype=pl.Categorical),
1522
],
1523
)
1524
def test_arg_min_arg_max_all_nulls_or_empty(series: pl.Series) -> None:
1525
assert series.arg_min() is None
1526
assert series.arg_max() is None
1527
1528
1529
def test_arg_min_and_arg_max_sorted() -> None:
1530
# test ascending and descending numerical series
1531
s = pl.Series([None, 1, 2, 3, 4, 5])
1532
s.sort(in_place=True) # set ascending sorted flag
1533
assert s.flags == {"SORTED_ASC": True, "SORTED_DESC": False}
1534
assert s.arg_min() == 1
1535
assert s.arg_max() == 5
1536
s = pl.Series([None, 5, 4, 3, 2, 1])
1537
s.sort(descending=True, in_place=True) # set descing sorted flag
1538
assert s.flags == {"SORTED_ASC": False, "SORTED_DESC": True}
1539
assert s.arg_min() == 5
1540
assert s.arg_max() == 1
1541
1542
# test ascending and descending str series
1543
s = pl.Series([None, "a", "b", "c", "d", "e"])
1544
s.sort(in_place=True) # set ascending sorted flag
1545
assert s.flags == {"SORTED_ASC": True, "SORTED_DESC": False}
1546
assert s.arg_min() == 1
1547
assert s.arg_max() == 5
1548
s = pl.Series([None, "e", "d", "c", "b", "a"])
1549
s.sort(descending=True, in_place=True) # set descing sorted flag
1550
assert s.flags == {"SORTED_ASC": False, "SORTED_DESC": True}
1551
assert s.arg_min() == 5
1552
assert s.arg_max() == 1
1553
1554
1555
def test_is_null_is_not_null() -> None:
1556
s = pl.Series("a", [1.0, 2.0, 3.0, None])
1557
assert_series_equal(s.is_null(), pl.Series("a", [False, False, False, True]))
1558
assert_series_equal(s.is_not_null(), pl.Series("a", [True, True, True, False]))
1559
1560
1561
def test_is_finite_is_infinite() -> None:
1562
s = pl.Series("a", [1.0, 2.0, np.inf])
1563
assert_series_equal(s.is_finite(), pl.Series("a", [True, True, False]))
1564
assert_series_equal(s.is_infinite(), pl.Series("a", [False, False, True]))
1565
1566
1567
@pytest.mark.parametrize("float_type", [pl.Float32, pl.Float64])
1568
def test_is_nan_is_not_nan(float_type: PolarsDataType) -> None:
1569
s = pl.Series([1.0, np.nan, None], dtype=float_type)
1570
1571
assert_series_equal(s.is_nan(), pl.Series([False, True, None]))
1572
assert_series_equal(s.is_not_nan(), pl.Series([True, False, None]))
1573
assert_series_equal(s.fill_nan(2.0), pl.Series([1.0, 2.0, None], dtype=float_type))
1574
assert_series_equal(s.drop_nans(), pl.Series([1.0, None], dtype=float_type))
1575
1576
1577
def test_float_methods_on_ints() -> None:
1578
# these float-specific methods work on non-float numeric types
1579
s = pl.Series([1, None], dtype=pl.Int32)
1580
assert_series_equal(s.is_finite(), pl.Series([True, None]))
1581
assert_series_equal(s.is_infinite(), pl.Series([False, None]))
1582
assert_series_equal(s.is_nan(), pl.Series([False, None]))
1583
assert_series_equal(s.is_not_nan(), pl.Series([True, None]))
1584
1585
1586
def test_dot() -> None:
1587
s1 = pl.Series("a", [1, 2, 3])
1588
s2 = pl.Series("b", [4.0, 5.0, 6.0])
1589
1590
assert np.array([1, 2, 3]) @ np.array([4, 5, 6]) == 32
1591
1592
for dot_result in (
1593
s1.dot(s2),
1594
s1 @ s2,
1595
[1, 2, 3] @ s2,
1596
s1 @ np.array([4, 5, 6]),
1597
):
1598
assert dot_result == 32
1599
1600
with pytest.raises(ShapeError, match="length mismatch"):
1601
s1 @ [4, 5, 6, 7, 8]
1602
1603
1604
@pytest.mark.parametrize(
1605
("dtype"),
1606
[pl.Int8, pl.Int16, pl.Int32, pl.Float32, pl.Float64],
1607
)
1608
def test_peak_max_peak_min(dtype: pl.DataType) -> None:
1609
s = pl.Series("a", [4, 1, 3, 2, 5], dtype=dtype)
1610
1611
result = s.peak_min()
1612
expected = pl.Series("a", [False, True, False, True, False])
1613
assert_series_equal(result, expected)
1614
1615
result = s.peak_max()
1616
expected = pl.Series("a", [True, False, True, False, True])
1617
assert_series_equal(result, expected)
1618
1619
1620
def test_peak_max_peak_min_bool() -> None:
1621
s = pl.Series("a", [False, True, False, True, True, False], dtype=pl.Boolean)
1622
result = s.peak_min()
1623
expected = pl.Series("a", [False, False, True, False, False, False])
1624
assert_series_equal(result, expected)
1625
1626
result = s.peak_max()
1627
expected = pl.Series("a", [False, True, False, False, False, False])
1628
assert_series_equal(result, expected)
1629
1630
1631
def test_shrink_to_fit() -> None:
1632
s = pl.Series("a", [4, 1, 3, 2, 5])
1633
sf = s.shrink_to_fit(in_place=True)
1634
assert sf is s
1635
1636
s = pl.Series("a", [4, 1, 3, 2, 5])
1637
sf = s.shrink_to_fit(in_place=False)
1638
assert s is not sf
1639
1640
1641
@pytest.mark.parametrize("unit", ["ns", "us", "ms"])
1642
def test_cast_datetime_to_time(unit: TimeUnit) -> None:
1643
a = pl.Series(
1644
"a",
1645
[
1646
datetime(2022, 9, 7, 0, 0),
1647
datetime(2022, 9, 6, 12, 0),
1648
datetime(2022, 9, 7, 23, 59, 59),
1649
datetime(2022, 9, 7, 23, 59, 59, 201),
1650
],
1651
dtype=Datetime(unit),
1652
)
1653
if unit == "ms":
1654
# NOTE: microseconds are lost for `unit=ms`
1655
expected_values = [time(0, 0), time(12, 0), time(23, 59, 59), time(23, 59, 59)]
1656
else:
1657
expected_values = [
1658
time(0, 0),
1659
time(12, 0),
1660
time(23, 59, 59),
1661
time(23, 59, 59, 201),
1662
]
1663
expected = pl.Series("a", expected_values)
1664
assert_series_equal(a.cast(Time), expected)
1665
1666
1667
def test_init_categorical() -> None:
1668
for values in [[None], ["foo", "bar"], [None, "foo", "bar"]]:
1669
expected = pl.Series("a", values, dtype=pl.String).cast(pl.Categorical)
1670
a = pl.Series("a", values, dtype=pl.Categorical)
1671
assert_series_equal(a, expected)
1672
1673
1674
def test_iter_nested_list() -> None:
1675
elems = list(pl.Series("s", [[1, 2], [3, 4]]))
1676
assert_series_equal(elems[0], pl.Series([1, 2]))
1677
assert_series_equal(elems[1], pl.Series([3, 4]))
1678
1679
rev_elems = list(reversed(pl.Series("s", [[1, 2], [3, 4]])))
1680
assert_series_equal(rev_elems[0], pl.Series([3, 4]))
1681
assert_series_equal(rev_elems[1], pl.Series([1, 2]))
1682
1683
1684
def test_iter_nested_struct() -> None:
1685
# note: this feels inconsistent with the above test for nested list, but
1686
# let's ensure the behaviour is codified before potentially modifying...
1687
elems = list(pl.Series("s", [{"a": 1, "b": 2}, {"a": 3, "b": 4}]))
1688
assert elems[0] == {"a": 1, "b": 2}
1689
assert elems[1] == {"a": 3, "b": 4}
1690
1691
rev_elems = list(reversed(pl.Series("s", [{"a": 1, "b": 2}, {"a": 3, "b": 4}])))
1692
assert rev_elems[0] == {"a": 3, "b": 4}
1693
assert rev_elems[1] == {"a": 1, "b": 2}
1694
1695
1696
@pytest.mark.parametrize(
1697
"dtype",
1698
[
1699
pl.UInt8,
1700
pl.Float32,
1701
pl.Int32,
1702
pl.Boolean,
1703
pl.List(pl.String),
1704
pl.Struct([pl.Field("a", pl.Int64), pl.Field("b", pl.Boolean)]),
1705
],
1706
)
1707
def test_nested_list_types_preserved(dtype: pl.DataType) -> None:
1708
srs = pl.Series([pl.Series([], dtype=dtype) for _ in range(5)])
1709
for srs_nested in srs:
1710
assert srs_nested.dtype == dtype
1711
1712
1713
def test_to_physical() -> None:
1714
# casting an int result in an int
1715
s = pl.Series("a", [1, 2, 3])
1716
assert_series_equal(s.to_physical(), s)
1717
1718
# casting a date results in an Int32
1719
s = pl.Series("a", [date(2020, 1, 1)] * 3)
1720
expected = pl.Series("a", [18262] * 3, dtype=Int32)
1721
assert_series_equal(s.to_physical(), expected)
1722
1723
# casting a categorical results in a UInt32
1724
s = pl.Series(["cat1"]).cast(pl.Categorical)
1725
assert s.to_physical().dtype == pl.UInt32
1726
1727
# casting a small enum results in a UInt8
1728
s = pl.Series(["cat1"]).cast(pl.Enum(["cat1"]))
1729
assert s.to_physical().dtype == pl.UInt8
1730
1731
# casting a List(Categorical) results in a List(UInt32)
1732
s = pl.Series([["cat1"]]).cast(pl.List(pl.Categorical))
1733
assert s.to_physical().dtype == pl.List(pl.UInt32)
1734
1735
# casting a List(Enum) with a small enum results in a List(UInt8)
1736
s = pl.Series(["cat1"]).cast(pl.List(pl.Enum(["cat1"])))
1737
assert s.to_physical().dtype == pl.List(pl.UInt8)
1738
1739
1740
def test_to_physical_rechunked_21285() -> None:
1741
# A series with multiple chunks, dtype is array or list of structs with a
1742
# null field (causes rechunking) and a field with a different physical and
1743
# logical repr (causes the full body of `to_physical_repr` to run).
1744
arr_dtype = pl.Array(pl.Struct({"f0": pl.Time, "f1": pl.Null}), shape=(1,))
1745
s = pl.Series("a", [None], arr_dtype) # content doesn't matter
1746
s = s.append(s)
1747
expected_arr_dtype = pl.Array(pl.Struct({"f0": Int64, "f1": pl.Null}), shape=(1,))
1748
expected = pl.Series("a", [None, None], expected_arr_dtype)
1749
assert_series_equal(s.to_physical(), expected)
1750
1751
list_dtype = pl.List(pl.Struct({"f0": pl.Time, "f1": pl.Null}))
1752
s = pl.Series("a", [None], list_dtype) # content doesn't matter
1753
s = s.append(s)
1754
expected_list_dtype = pl.List(pl.Struct({"f0": Int64, "f1": pl.Null}))
1755
expected = pl.Series("a", [None, None], expected_list_dtype)
1756
assert_series_equal(s.to_physical(), expected)
1757
1758
1759
def test_is_between_datetime() -> None:
1760
s = pl.Series("a", [datetime(2020, 1, 1, 10, 0, 0), datetime(2020, 1, 1, 20, 0, 0)])
1761
start = datetime(2020, 1, 1, 12, 0, 0)
1762
end = datetime(2020, 1, 1, 23, 0, 0)
1763
expected = pl.Series("a", [False, True])
1764
1765
# only on the expression api
1766
result = s.to_frame().with_columns(pl.col("*").is_between(start, end)).to_series()
1767
assert_series_equal(result, expected)
1768
1769
1770
@pytest.mark.parametrize(
1771
"f",
1772
[
1773
"sin",
1774
"cos",
1775
"tan",
1776
"arcsin",
1777
"arccos",
1778
"arctan",
1779
"sinh",
1780
"cosh",
1781
"tanh",
1782
"arcsinh",
1783
"arccosh",
1784
"arctanh",
1785
],
1786
)
1787
@pytest.mark.filterwarnings("ignore:invalid value encountered:RuntimeWarning")
1788
def test_trigonometric(f: str) -> None:
1789
s = pl.Series("a", [0.0, math.pi, None, math.nan])
1790
expected = (
1791
pl.Series("a", getattr(np, f)(s.to_numpy()))
1792
.to_frame()
1793
.with_columns(pl.when(s.is_null()).then(None).otherwise(pl.col("a")).alias("a"))
1794
.to_series()
1795
)
1796
result = getattr(s, f)()
1797
assert_series_equal(result, expected)
1798
1799
1800
@pytest.mark.filterwarnings("ignore:invalid value encountered:RuntimeWarning")
1801
def test_trigonometric_cot() -> None:
1802
# cotangent is not available in numpy...
1803
s = pl.Series("a", [0.0, math.pi, None, math.nan])
1804
expected = pl.Series("a", [math.inf, -8.1656e15, None, math.nan])
1805
assert_series_equal(s.cot(), expected)
1806
1807
1808
def test_trigonometric_invalid_input() -> None:
1809
# String
1810
s = pl.Series("a", ["1", "2", "3"])
1811
with pytest.raises(InvalidOperationError):
1812
s.sin()
1813
1814
# Date
1815
s = pl.Series("a", [date(1990, 2, 28), date(2022, 7, 26)])
1816
with pytest.raises(InvalidOperationError):
1817
s.cosh()
1818
1819
1820
@pytest.mark.parametrize("dtype", INTEGER_DTYPES)
1821
def test_product_ints(dtype: PolarsDataType) -> None:
1822
a = pl.Series("a", [1, 2, 3], dtype=dtype)
1823
out = a.product()
1824
assert out == 6
1825
a = pl.Series("a", [1, 2, None], dtype=dtype)
1826
out = a.product()
1827
assert out == 2
1828
a = pl.Series("a", [None, 2, 3], dtype=dtype)
1829
out = a.product()
1830
assert out == 6
1831
1832
1833
@pytest.mark.parametrize("dtype", FLOAT_DTYPES)
1834
def test_product_floats(dtype: PolarsDataType) -> None:
1835
a = pl.Series("a", [], dtype=dtype)
1836
out = a.product()
1837
assert out == 1
1838
a = pl.Series("a", [None, None], dtype=dtype)
1839
out = a.product()
1840
assert out == 1
1841
a = pl.Series("a", [3.0, None, float("nan")], dtype=dtype)
1842
out = a.product()
1843
assert math.isnan(out)
1844
1845
1846
def test_ceil() -> None:
1847
s = pl.Series([1.8, 1.2, 3.0])
1848
expected = pl.Series([2.0, 2.0, 3.0])
1849
assert_series_equal(s.ceil(), expected)
1850
1851
1852
def test_duration_arithmetic() -> None:
1853
# apply some basic duration math to series
1854
s = pl.Series([datetime(2022, 1, 1, 10, 20, 30), datetime(2022, 1, 2, 20, 40, 50)])
1855
d1 = pl.duration(days=5, microseconds=123456)
1856
d2 = timedelta(days=5, microseconds=123456)
1857
1858
expected_values = [
1859
datetime(2022, 1, 6, 10, 20, 30, 123456),
1860
datetime(2022, 1, 7, 20, 40, 50, 123456),
1861
]
1862
for d in (d1, d2):
1863
df1 = pl.select((s + d).alias("d_offset"))
1864
df2 = pl.select((d + s).alias("d_offset"))
1865
assert df1["d_offset"].to_list() == expected_values
1866
assert_series_equal(df1["d_offset"], df2["d_offset"])
1867
1868
1869
def test_mean_overflow() -> None:
1870
arr = np.array([255] * (1 << 17), dtype="int16")
1871
assert arr.mean() == 255.0
1872
1873
1874
def test_sign() -> None:
1875
# Integers
1876
a = pl.Series("a", [-9, -0, 0, 4, None])
1877
expected = pl.Series("a", [-1, 0, 0, 1, None])
1878
assert_series_equal(a.sign(), expected)
1879
1880
# Floats
1881
a = pl.Series("a", [-9.0, -0.0, 0.0, 4.0, float("nan"), None])
1882
expected = pl.Series("a", [-1.0, 0.0, 0.0, 1.0, float("nan"), None])
1883
assert_series_equal(a.sign(), expected)
1884
1885
# Decimal
1886
s = pl.Series("a", [1, -1, 10, -10])
1887
for scale in [0, 1, 2, 3, 7, 16, 20, 30]:
1888
dtype = pl.Decimal(scale=scale)
1889
assert_series_equal(s.sign().cast(dtype), s.cast(dtype).sign())
1890
1891
s = pl.Series("a", ["1.00", "20.00", "-1", "0", "-7"], dtype)
1892
assert_series_equal(
1893
s.sign(), pl.Series("a", ["1", "1", "-1", "0", "-1"], dtype)
1894
)
1895
1896
# Invalid input
1897
a = pl.Series("a", [date(1950, 2, 1), date(1970, 1, 1), date(2022, 12, 12), None])
1898
with pytest.raises(InvalidOperationError):
1899
a.sign()
1900
1901
1902
def test_exp() -> None:
1903
s = pl.Series("a", [0.1, 0.01, None])
1904
expected = pl.Series("a", [1.1051709180756477, 1.010050167084168, None])
1905
assert_series_equal(s.exp(), expected)
1906
# test if we can run on empty series as well.
1907
assert s[:0].exp().to_list() == []
1908
1909
1910
def test_cumulative_eval() -> None:
1911
s = pl.Series("values", [1, 2, 3, 4, 5])
1912
1913
# evaluate expressions individually
1914
expr1 = pl.element().first()
1915
expr2 = pl.element().last() ** 2
1916
1917
expected1 = pl.Series("values", [1, 1, 1, 1, 1])
1918
expected2 = pl.Series("values", [1, 4, 9, 16, 25])
1919
assert_series_equal(s.cumulative_eval(expr1), expected1)
1920
assert_series_equal(s.cumulative_eval(expr2), expected2)
1921
1922
# evaluate combined expressions and validate
1923
expr3 = expr1 - expr2
1924
expected3 = pl.Series("values", [0, -3, -8, -15, -24])
1925
assert_series_equal(s.cumulative_eval(expr3), expected3)
1926
1927
1928
def test_first_last() -> None:
1929
# Ensure multiple chunks
1930
s1 = pl.Series("a", [None, None], dtype=pl.Int32)
1931
s2 = pl.Series("a", [None, 3, 4, None], dtype=pl.Int32)
1932
s3 = pl.Series("a", [None, None], dtype=pl.Int32)
1933
s = s1.append(s2).append(s3)
1934
assert s.first() is None
1935
assert s.first(ignore_nulls=True) == 3
1936
assert s.last() is None
1937
assert s.last(ignore_nulls=True) == 4
1938
1939
1940
def test_clip() -> None:
1941
s = pl.Series("foo", [-50, 5, None, 50])
1942
assert s.clip(1, 10).to_list() == [1, 5, None, 10]
1943
1944
1945
def test_repr() -> None:
1946
s = pl.Series("ints", [1001, 2002, 3003])
1947
s_repr = repr(s)
1948
1949
assert "shape: (3,)" in s_repr
1950
assert "Series: 'ints' [i64]" in s_repr
1951
for n in s.to_list():
1952
assert str(n) in s_repr
1953
1954
class XSeries(pl.Series):
1955
"""Custom Series class."""
1956
1957
# check custom class name reflected in repr output
1958
x = XSeries("ints", [1001, 2002, 3003])
1959
x_repr = repr(x)
1960
1961
assert "shape: (3,)" in x_repr
1962
assert "XSeries: 'ints' [i64]" in x_repr
1963
assert "1001" in x_repr
1964
for n in x.to_list():
1965
assert str(n) in x_repr
1966
1967
1968
def test_repr_html(df: pl.DataFrame) -> None:
1969
# check it does not panic/error, and appears to contain a table
1970
html = pl.Series("misc", [123, 456, 789])._repr_html_()
1971
assert "<table" in html
1972
1973
1974
@pytest.mark.parametrize(
1975
("value", "time_unit", "exp", "exp_type"),
1976
[
1977
(
1978
13285,
1979
"d",
1980
date(2006, 5, 17),
1981
pl.Date,
1982
),
1983
(
1984
1147880044,
1985
"s",
1986
datetime(2006, 5, 17, 15, 34, 4),
1987
pl.Datetime("us"),
1988
),
1989
(
1990
1147880044 * 1_000,
1991
"ms",
1992
datetime(2006, 5, 17, 15, 34, 4),
1993
pl.Datetime("us"),
1994
),
1995
(
1996
1147880044 * 1_000_000,
1997
"us",
1998
datetime(2006, 5, 17, 15, 34, 4),
1999
pl.Datetime("us"),
2000
),
2001
(
2002
1147880044 * 1_000_000_000,
2003
"ns",
2004
datetime(2006, 5, 17, 15, 34, 4),
2005
pl.Datetime("ns"),
2006
),
2007
],
2008
)
2009
def test_from_epoch_expr(
2010
value: int,
2011
time_unit: EpochTimeUnit,
2012
exp: date | datetime,
2013
exp_type: PolarsDataType,
2014
) -> None:
2015
s = pl.Series("timestamp", [value, None])
2016
result = pl.from_epoch(s, time_unit=time_unit)
2017
2018
expected = pl.Series("timestamp", [exp, None]).cast(exp_type)
2019
assert_series_equal(result, expected)
2020
2021
2022
def test_get_chunks() -> None:
2023
a = pl.Series("a", [1, 2])
2024
b = pl.Series("a", [3, 4])
2025
chunks = pl.concat([a, b], rechunk=False).get_chunks()
2026
assert_series_equal(chunks[0], a)
2027
assert_series_equal(chunks[1], b)
2028
2029
2030
def test_null_comparisons() -> None:
2031
s = pl.Series("s", [None, "str", "a"])
2032
assert (s.shift() == s).null_count() == 2
2033
assert (s.shift() != s).null_count() == 2
2034
2035
2036
def test_min_max_agg_on_str() -> None:
2037
strings = ["b", "a", "x"]
2038
s = pl.Series(strings)
2039
assert (s.min(), s.max()) == ("a", "x")
2040
2041
2042
def test_min_max_full_nan_15058() -> None:
2043
s = pl.Series([float("nan")] * 2)
2044
assert all(x != x for x in [s.min(), s.max()])
2045
2046
2047
def test_is_between() -> None:
2048
s = pl.Series("num", [1, 2, None, 4, 5])
2049
assert s.is_between(2, 4).to_list() == [False, True, None, True, False]
2050
2051
s = pl.Series("num", [1, 2, None, 4, 5])
2052
assert s.is_between(2, 4, closed="left").to_list() == [
2053
False,
2054
True,
2055
None,
2056
False,
2057
False,
2058
]
2059
2060
s = pl.Series("num", [1, 2, None, 4, 5])
2061
assert s.is_between(2, 4, closed="right").to_list() == [
2062
False,
2063
False,
2064
None,
2065
True,
2066
False,
2067
]
2068
2069
s = pl.Series("num", [1, 2, None, 4, 5])
2070
assert s.is_between(pl.lit(2) / 2, pl.lit(4) * 2, closed="both").to_list() == [
2071
True,
2072
True,
2073
None,
2074
True,
2075
True,
2076
]
2077
2078
s = pl.Series("s", ["a", "b", "c", "d", "e"])
2079
assert s.is_between("b", "d").to_list() == [
2080
False,
2081
True,
2082
True,
2083
True,
2084
False,
2085
]
2086
2087
2088
@pytest.mark.parametrize(
2089
("dtype", "lower", "upper"),
2090
[
2091
(pl.Int8, -128, 127),
2092
(pl.UInt8, 0, 255),
2093
(pl.Int16, -32768, 32767),
2094
(pl.UInt16, 0, 65535),
2095
(pl.Int32, -2147483648, 2147483647),
2096
(pl.UInt32, 0, 4294967295),
2097
(pl.Int64, -9223372036854775808, 9223372036854775807),
2098
(pl.UInt64, 0, 18446744073709551615),
2099
(pl.Float32, float("-inf"), float("inf")),
2100
(pl.Float64, float("-inf"), float("inf")),
2101
],
2102
)
2103
def test_upper_lower_bounds(
2104
dtype: PolarsDataType, upper: int | float, lower: int | float
2105
) -> None:
2106
s = pl.Series("s", dtype=dtype)
2107
assert s.lower_bound().item() == lower
2108
assert s.upper_bound().item() == upper
2109
2110
2111
def test_numpy_series_arithmetic() -> None:
2112
sx = pl.Series(values=[1, 2])
2113
y = np.array([3.0, 4.0])
2114
2115
result_add1 = y + sx
2116
result_add2 = sx + y
2117
expected_add = pl.Series([4.0, 6.0], dtype=pl.Float64)
2118
assert_series_equal(result_add1, expected_add) # type: ignore[arg-type]
2119
assert_series_equal(result_add2, expected_add)
2120
2121
result_sub1 = cast("pl.Series", y - sx) # py37 is different vs py311 on this one
2122
expected = pl.Series([2.0, 2.0], dtype=pl.Float64)
2123
assert_series_equal(result_sub1, expected)
2124
result_sub2 = sx - y
2125
expected = pl.Series([-2.0, -2.0], dtype=pl.Float64)
2126
assert_series_equal(result_sub2, expected)
2127
2128
result_mul1 = y * sx
2129
result_mul2 = sx * y
2130
expected = pl.Series([3.0, 8.0], dtype=pl.Float64)
2131
assert_series_equal(result_mul1, expected) # type: ignore[arg-type]
2132
assert_series_equal(result_mul2, expected)
2133
2134
result_div1 = y / sx
2135
expected = pl.Series([3.0, 2.0], dtype=pl.Float64)
2136
assert_series_equal(result_div1, expected) # type: ignore[arg-type]
2137
result_div2 = sx / y
2138
expected = pl.Series([1 / 3, 0.5], dtype=pl.Float64)
2139
assert_series_equal(result_div2, expected)
2140
2141
result_pow1 = y**sx
2142
expected = pl.Series([3.0, 16.0], dtype=pl.Float64)
2143
assert_series_equal(result_pow1, expected) # type: ignore[arg-type]
2144
result_pow2 = sx**y
2145
expected = pl.Series([1.0, 16.0], dtype=pl.Float64)
2146
assert_series_equal(result_pow2, expected) # type: ignore[arg-type]
2147
2148
2149
def test_from_epoch_seq_input() -> None:
2150
seq_input = [1147880044]
2151
expected = pl.Series([datetime(2006, 5, 17, 15, 34, 4)])
2152
result = pl.from_epoch(seq_input)
2153
assert_series_equal(result, expected)
2154
2155
2156
def test_symmetry_for_max_in_names() -> None:
2157
# int
2158
a = pl.Series("a", [1])
2159
assert (a - a.max()).name == (a.max() - a).name == a.name # type: ignore[union-attr]
2160
# float
2161
a = pl.Series("a", [1.0])
2162
assert (a - a.max()).name == (a.max() - a).name == a.name # type: ignore[union-attr]
2163
# duration
2164
a = pl.Series("a", [1], dtype=pl.Duration("ns"))
2165
assert (a - a.max()).name == (a.max() - a).name == a.name # type: ignore[union-attr]
2166
# datetime
2167
a = pl.Series("a", [1], dtype=pl.Datetime("ns"))
2168
assert (a - a.max()).name == (a.max() - a).name == a.name # type: ignore[union-attr]
2169
2170
# TODO: time arithmetic support?
2171
# a = pl.Series("a", [1], dtype=pl.Time)
2172
# assert (a - a.max()).name == (a.max() - a).name == a.name
2173
2174
2175
def test_series_getitem_out_of_bounds_positive() -> None:
2176
s = pl.Series([1, 2])
2177
with pytest.raises(
2178
IndexError, match="index 10 is out of bounds for sequence of length 2"
2179
):
2180
s[10]
2181
2182
2183
def test_series_getitem_out_of_bounds_negative() -> None:
2184
s = pl.Series([1, 2])
2185
with pytest.raises(
2186
IndexError, match="index -10 is out of bounds for sequence of length 2"
2187
):
2188
s[-10]
2189
2190
2191
def test_series_cmp_fast_paths() -> None:
2192
assert (
2193
pl.Series([None], dtype=pl.Int32) != pl.Series([1, 2], dtype=pl.Int32)
2194
).to_list() == [None, None]
2195
assert (
2196
pl.Series([None], dtype=pl.Int32) == pl.Series([1, 2], dtype=pl.Int32)
2197
).to_list() == [None, None]
2198
2199
assert (
2200
pl.Series([None], dtype=pl.String) != pl.Series(["a", "b"], dtype=pl.String)
2201
).to_list() == [None, None]
2202
assert (
2203
pl.Series([None], dtype=pl.String) == pl.Series(["a", "b"], dtype=pl.String)
2204
).to_list() == [None, None]
2205
2206
assert (
2207
pl.Series([None], dtype=pl.Boolean)
2208
!= pl.Series([True, False], dtype=pl.Boolean)
2209
).to_list() == [None, None]
2210
assert (
2211
pl.Series([None], dtype=pl.Boolean)
2212
== pl.Series([False, False], dtype=pl.Boolean)
2213
).to_list() == [None, None]
2214
2215
2216
def test_comp_series_with_str_13123() -> None:
2217
s = pl.Series(["1", "2", None])
2218
assert_series_equal(s != "1", pl.Series([False, True, None]))
2219
assert_series_equal(s == "1", pl.Series([True, False, None]))
2220
assert_series_equal(s.eq_missing("1"), pl.Series([True, False, False]))
2221
assert_series_equal(s.ne_missing("1"), pl.Series([False, True, True]))
2222
2223
2224
@pytest.mark.parametrize(
2225
("data", "single", "multiple", "single_expected", "multiple_expected"),
2226
[
2227
([1, 2, 3], 1, [2, 4], 0, [1, 3]),
2228
(["a", "b", "c"], "d", ["a", "d"], 3, [0, 3]),
2229
([b"a", b"b", b"c"], b"d", [b"a", b"d"], 3, [0, 3]),
2230
(
2231
[date(2022, 1, 2), date(2023, 4, 1)],
2232
date(2022, 1, 1),
2233
[date(1999, 10, 1), date(2024, 1, 1)],
2234
0,
2235
[0, 2],
2236
),
2237
([1, 2, 3], 1, np.array([2, 4]), 0, [1, 3]), # test np array.
2238
],
2239
)
2240
def test_search_sorted(
2241
data: list[Any],
2242
single: Any,
2243
multiple: list[Any],
2244
single_expected: Any,
2245
multiple_expected: list[Any],
2246
) -> None:
2247
s = pl.Series(data)
2248
single_s = s.search_sorted(single)
2249
assert single_s == single_expected
2250
2251
multiple_s = s.search_sorted(multiple)
2252
assert_series_equal(
2253
multiple_s, pl.Series(multiple_expected, dtype=pl.get_index_type())
2254
)
2255
2256
2257
def test_series_from_pandas_with_dtype() -> None:
2258
expected = pl.Series("foo", [1, 2, 3], dtype=pl.Int8)
2259
s = pl.Series("foo", pd.Series([1, 2, 3]), pl.Int8)
2260
assert_series_equal(s, expected)
2261
s = pl.Series("foo", pd.Series([1, 2, 3], dtype="Int16"), pl.Int8)
2262
assert_series_equal(s, expected)
2263
2264
with pytest.raises(InvalidOperationError, match="conversion from"):
2265
pl.Series("foo", pd.Series([-1, 2, 3]), pl.UInt8)
2266
s = pl.Series("foo", pd.Series([-1, 2, 3]), pl.UInt8, strict=False)
2267
assert s.to_list() == [None, 2, 3]
2268
assert s.dtype == pl.UInt8
2269
2270
with pytest.raises(InvalidOperationError, match="conversion from"):
2271
pl.Series("foo", pd.Series([-1, 2, 3], dtype="Int8"), pl.UInt8)
2272
s = pl.Series("foo", pd.Series([-1, 2, 3], dtype="Int8"), pl.UInt8, strict=False)
2273
assert s.to_list() == [None, 2, 3]
2274
assert s.dtype == pl.UInt8
2275
2276
2277
def test_series_from_pyarrow_with_dtype() -> None:
2278
s = pl.Series("foo", pa.array([-1, 2, 3]), pl.Int8)
2279
assert_series_equal(s, pl.Series("foo", [-1, 2, 3], dtype=pl.Int8))
2280
2281
with pytest.raises(InvalidOperationError, match="conversion from"):
2282
pl.Series("foo", pa.array([-1, 2, 3]), pl.UInt8)
2283
2284
s = pl.Series("foo", pa.array([-1, 2, 3]), dtype=pl.UInt8, strict=False)
2285
assert s.to_list() == [None, 2, 3]
2286
assert s.dtype == pl.UInt8
2287
2288
2289
def test_series_from_numpy_with_dtype() -> None:
2290
s = pl.Series("foo", np.array([-1, 2, 3]), pl.Int8)
2291
assert_series_equal(s, pl.Series("foo", [-1, 2, 3], dtype=pl.Int8))
2292
2293
with pytest.raises(InvalidOperationError, match="conversion from"):
2294
pl.Series("foo", np.array([-1, 2, 3]), pl.UInt8)
2295
2296
s = pl.Series("foo", np.array([-1, 2, 3]), dtype=pl.UInt8, strict=False)
2297
assert s.to_list() == [None, 2, 3]
2298
assert s.dtype == pl.UInt8
2299
2300
2301
def test_raise_invalid_is_between() -> None:
2302
with pytest.raises(pl.exceptions.InvalidOperationError):
2303
pl.select(pl.lit(2).is_between(pl.lit("11"), pl.lit("33")))
2304
2305
2306
def test_construction_large_nested_u64_17231() -> None:
2307
import polars as pl
2308
2309
values = [{"f0": [9223372036854775808]}]
2310
dtype = pl.Struct({"f0": pl.List(pl.UInt64)})
2311
assert pl.Series(values, dtype=dtype).to_list() == values
2312
2313
2314
def test_repeat_by() -> None:
2315
calculated = pl.select(a=pl.Series("a", [1, 2]).repeat_by(2))
2316
expected = pl.select(a=pl.Series("a", [[1, 1], [2, 2]]))
2317
assert calculated.equals(expected)
2318
2319
2320
def test_is_close() -> None:
2321
a = pl.Series(
2322
"a",
2323
[
2324
1.0,
2325
1.0,
2326
float("-inf"),
2327
float("inf"),
2328
float("inf"),
2329
float("inf"),
2330
float("nan"),
2331
],
2332
)
2333
b = pl.Series(
2334
"b", [1.3, 1.7, float("-inf"), float("inf"), float("-inf"), 1.0, float("nan")]
2335
)
2336
assert a.is_close(b, abs_tol=0.5).to_list() == [
2337
True,
2338
False,
2339
True,
2340
True,
2341
False,
2342
False,
2343
False,
2344
]
2345
2346
2347
def test_is_close_literal() -> None:
2348
a = pl.Series("a", [1.1, 1.2, 1.3, 1.4, float("inf"), float("nan")])
2349
assert a.is_close(1.2).to_list() == [False, True, False, False, False, False]
2350
2351
2352
def test_is_close_nans_equal() -> None:
2353
a = pl.Series("a", [1.0, float("nan")])
2354
b = pl.Series("b", [2.0, float("nan")])
2355
assert a.is_close(b, nans_equal=True).to_list() == [False, True]
2356
2357
2358
def test_is_close_invalid_abs_tol() -> None:
2359
with pytest.raises(pl.exceptions.ComputeError):
2360
pl.select(pl.lit(1.0).is_close(1, abs_tol=-1.0))
2361
2362
2363
def test_is_close_invalid_rel_tol() -> None:
2364
with pytest.raises(pl.exceptions.ComputeError):
2365
pl.select(pl.lit(1.0).is_close(1, rel_tol=-1.0))
2366
2367
2368
def test_comparisons_structs_raise() -> None:
2369
s = pl.Series([{"x": 1}, {"x": 2}, {"x": 3}])
2370
rhss = ["", " ", 5, {"x": 1}]
2371
for rhs in rhss:
2372
with pytest.raises(
2373
NotImplementedError,
2374
match=r"Series of type Struct\(\{'x': Int64\}\) does not have eq operator",
2375
):
2376
s == rhs # noqa: B015
2377
2378