Path: blob/main/py-polars/tests/unit/series/test_series.py
8440 views
from __future__ import annotations12import math3from datetime import date, datetime, time, timedelta4from decimal import Decimal5from typing import TYPE_CHECKING, Any, cast6from zoneinfo import ZoneInfo78import numpy as np9import pandas as pd10import pyarrow as pa11import pytest1213import polars as pl14from polars._utils.construction import iterable_to_pyseries15from polars.datatypes import (16Datetime,17Field,18Float64,19Int32,20Int64,21Time,22UInt32,23UInt64,24Unknown,25)26from polars.exceptions import (27DuplicateError,28InvalidOperationError,29PolarsInefficientMapWarning,30ShapeError,31)32from polars.testing import assert_frame_equal, assert_series_equal33from tests.unit.conftest import FLOAT_DTYPES, INTEGER_DTYPES34from tests.unit.utils.pycapsule_utils import PyCapsuleStreamHolder3536if TYPE_CHECKING:37from collections.abc import Iterator3839from polars._typing import EpochTimeUnit, PolarsDataType, TimeUnit40from tests.conftest import PlMonkeyPatch414243def test_cum_agg() -> None:44# confirm that known series give expected results45s = pl.Series("a", [1, 2, 3, 2])46assert_series_equal(s.cum_sum(), pl.Series("a", [1, 3, 6, 8]))47assert_series_equal(s.cum_min(), pl.Series("a", [1, 1, 1, 1]))48assert_series_equal(s.cum_max(), pl.Series("a", [1, 2, 3, 3]))49assert_series_equal(s.cum_prod(), pl.Series("a", [1, 2, 6, 12]))505152def test_cum_agg_with_nulls() -> None:53# confirm that known series give expected results54s = pl.Series("a", [None, 2, None, 7, 8, None])55assert_series_equal(s.cum_sum(), pl.Series("a", [None, 2, None, 9, 17, None]))56assert_series_equal(s.cum_min(), pl.Series("a", [None, 2, None, 2, 2, None]))57assert_series_equal(s.cum_max(), pl.Series("a", [None, 2, None, 7, 8, None]))58assert_series_equal(s.cum_prod(), pl.Series("a", [None, 2, None, 14, 112, None]))596061def test_cum_agg_with_infs() -> None:62# confirm that inf values are handled correctly63s = pl.Series([float("inf"), 0.0, 1.0])64assert_series_equal(s.cum_min(), pl.Series([float("inf"), 0.0, 0.0]))6566s = pl.Series([float("-inf"), 0.0, 1.0])67assert_series_equal(s.cum_max(), pl.Series([float("-inf"), 0.0, 1.0]))686970def test_cum_min_max_bool() -> None:71s = pl.Series("a", [None, True, True, None, False, None, True, False, False, None])72assert_series_equal(s.cum_min().cast(pl.Int32), s.cast(pl.Int32).cum_min())73assert_series_equal(s.cum_max().cast(pl.Int32), s.cast(pl.Int32).cum_max())74assert_series_equal(75s.cum_min(reverse=True).cast(pl.Int32), s.cast(pl.Int32).cum_min(reverse=True)76)77assert_series_equal(78s.cum_max(reverse=True).cast(pl.Int32), s.cast(pl.Int32).cum_max(reverse=True)79)808182def test_init_inputs(plmonkeypatch: PlMonkeyPatch) -> None:83nan = float("nan")84# Good inputs85pl.Series("a", [1, 2])86pl.Series("a", values=[1, 2])87pl.Series(name="a", values=[1, 2])88pl.Series(values=[1, 2], name="a")8990assert pl.Series([1, 2]).dtype == pl.Int6491assert pl.Series(values=[1, 2]).dtype == pl.Int6492assert pl.Series("a").dtype == pl.Null # Null dtype used in case of no data93assert pl.Series().dtype == pl.Null94assert pl.Series([]).dtype == pl.Null95assert (96pl.Series([None, None, None]).dtype == pl.Null97) # f32 type used for list with only None98assert pl.Series(values=[True, False]).dtype == pl.Boolean99assert pl.Series(values=np.array([True, False])).dtype == pl.Boolean100assert pl.Series(values=np.array(["foo", "bar"])).dtype == pl.String101assert pl.Series(values=["foo", "bar"]).dtype == pl.String102assert pl.Series("a", [pl.Series([1, 2, 4]), pl.Series([3, 2, 1])]).dtype == pl.List103assert pl.Series("a", [10000, 20000, 30000], dtype=pl.Time).dtype == pl.Time104105# 2d numpy array and/or list of 1d numpy arrays106for res in (107pl.Series(108name="a",109values=np.array([[1, 2], [3, nan]], dtype=np.float32),110nan_to_null=True,111),112pl.Series(113name="a",114values=[115np.array([1, 2], dtype=np.float32),116np.array([3, nan], dtype=np.float32),117],118nan_to_null=True,119),120pl.Series(121name="a",122values=(123np.ndarray((2,), np.float32, np.array([1, 2], dtype=np.float32)),124np.ndarray((2,), np.float32, np.array([3, nan], dtype=np.float32)),125),126nan_to_null=True,127),128):129assert res.dtype == pl.Array(pl.Float32, shape=2)130assert res[0].to_list() == [1.0, 2.0]131assert res[1].to_list() == [3.0, None]132133# numpy from arange, with/without dtype134two_ints = np.arange(2, dtype=np.int64)135three_ints = np.arange(3, dtype=np.int64)136for res in (137pl.Series("a", [two_ints, three_ints]),138pl.Series("a", [two_ints, three_ints], dtype=pl.List(pl.Int64)),139):140assert res.dtype == pl.List(pl.Int64)141assert res.to_list() == [[0, 1], [0, 1, 2]]142143assert pl.Series(144values=np.array([["foo", "bar"], ["foo2", "bar2"]])145).dtype == pl.Array(pl.String, shape=2)146147# lists148assert pl.Series("a", [[1, 2], [3, 4]]).dtype == pl.List(pl.Int64)149150# conversion of Date to Datetime151s = pl.Series([date(2023, 1, 1), date(2023, 1, 2)], dtype=pl.Datetime)152assert s.to_list() == [datetime(2023, 1, 1), datetime(2023, 1, 2)]153assert Datetime == s.dtype154assert s.dtype.time_unit == "us" # type: ignore[attr-defined]155assert s.dtype.time_zone is None # type: ignore[attr-defined]156157# conversion of Date to Datetime with specified timezone and units158tu: TimeUnit = "ms"159tz = "America/Argentina/Rio_Gallegos"160s = pl.Series(161[date(2023, 1, 1), date(2023, 1, 2)], dtype=pl.Datetime(tu)162).dt.replace_time_zone(tz)163d1 = datetime(2023, 1, 1, 0, 0, 0, 0, ZoneInfo(tz))164d2 = datetime(2023, 1, 2, 0, 0, 0, 0, ZoneInfo(tz))165assert s.to_list() == [d1, d2]166assert Datetime == s.dtype167assert s.dtype.time_unit == tu # type: ignore[attr-defined]168assert s.dtype.time_zone == tz # type: ignore[attr-defined]169170# datetime64: check timeunit (auto-detect, implicit/explicit) and NaT171d64 = pd.date_range(date(2021, 8, 1), date(2021, 8, 3)).values.astype(172"datetime64[ns]"173)174d64[1] = None175176expected = [datetime(2021, 8, 1, 0), None, datetime(2021, 8, 3, 0)]177for dtype in (None, Datetime, Datetime("ns")):178s = pl.Series("dates", d64, dtype)179assert s.to_list() == expected180assert Datetime == s.dtype181assert s.dtype.time_unit == "ns" # type: ignore[attr-defined]182183s = pl.Series(values=d64.astype("<M8[ms]"))184assert s.dtype.time_unit == "ms" # type: ignore[attr-defined]185assert expected == s.to_list()186187# pandas188assert pl.Series(pd.Series([1, 2])).dtype == pl.Int64189190# Bad inputs191with pytest.raises(TypeError):192pl.Series([1, 2, 3], [1, 2, 3])193with pytest.raises(TypeError):194pl.Series({"a": [1, 2, 3]})195with pytest.raises(OverflowError):196pl.Series("bigint", [2**128])197198# numpy not available199plmonkeypatch.setattr(pl.series.series, "_check_for_numpy", lambda x: False)200with pytest.raises(TypeError):201pl.DataFrame(np.array([1, 2, 3]), schema=["a"])202203204def test_init_structured_objects() -> None:205# validate init from dataclass, namedtuple, and pydantic model objects206from typing import NamedTuple207208from polars._dependencies import dataclasses, pydantic209210@dataclasses.dataclass211class TeaShipmentDC:212exporter: str213importer: str214product: str215tonnes: int | None216217class TeaShipmentNT(NamedTuple):218exporter: str219importer: str220product: str221tonnes: None | int222223class TeaShipmentPD(pydantic.BaseModel):224exporter: str225importer: str226product: str227tonnes: int228229for Tea in (TeaShipmentDC, TeaShipmentNT, TeaShipmentPD):230t0 = Tea(exporter="Sri Lanka", importer="USA", product="Ceylon", tonnes=10)231t1 = Tea(exporter="India", importer="UK", product="Darjeeling", tonnes=25)232t2 = Tea(exporter="China", importer="UK", product="Keemum", tonnes=40)233234s = pl.Series("t", [t0, t1, t2])235236assert isinstance(s, pl.Series)237assert s.dtype.fields == [ # type: ignore[attr-defined]238Field("exporter", pl.String),239Field("importer", pl.String),240Field("product", pl.String),241Field("tonnes", pl.Int64),242]243assert s.to_list() == [244{245"exporter": "Sri Lanka",246"importer": "USA",247"product": "Ceylon",248"tonnes": 10,249},250{251"exporter": "India",252"importer": "UK",253"product": "Darjeeling",254"tonnes": 25,255},256{257"exporter": "China",258"importer": "UK",259"product": "Keemum",260"tonnes": 40,261},262]263assert_frame_equal(s.to_frame(), pl.DataFrame({"t": [t0, t1, t2]}))264265266def test_to_frame() -> None:267s1 = pl.Series([1, 2])268s2 = pl.Series("s", [1, 2])269270df1 = s1.to_frame()271df2 = s2.to_frame()272df3 = s1.to_frame("xyz")273df4 = s2.to_frame("xyz")274275for df, name in ((df1, ""), (df2, "s"), (df3, "xyz"), (df4, "xyz")):276assert isinstance(df, pl.DataFrame)277assert df.rows() == [(1,), (2,)]278assert df.columns == [name]279280# note: the empty string IS technically a valid column name281assert s2.to_frame("").columns == [""]282assert s2.name == "s"283284285def test_bitwise_ops() -> None:286a = pl.Series([True, False, True])287b = pl.Series([False, True, True])288assert_series_equal((a & b), pl.Series([False, False, True]))289assert_series_equal((a | b), pl.Series([True, True, True]))290assert_series_equal((a ^ b), pl.Series([True, True, False]))291assert_series_equal((~a), pl.Series([False, True, False]))292293# rand/rxor/ror we trigger by casting the left hand to a list here in the test294# Note that the type annotations only allow Series to be passed in, but there is295# specific code to deal with non-Series inputs.296assert_series_equal(297(True & a),298pl.Series([True, False, True]),299)300assert_series_equal(301(True | a),302pl.Series([True, True, True]),303)304assert_series_equal(305(True ^ a),306pl.Series([False, True, False]),307)308309310def test_bitwise_floats_invert() -> None:311s = pl.Series([2.0, 3.0, 0.0])312313with pytest.raises(InvalidOperationError):314~s315316317def test_equality() -> None:318a = pl.Series("a", [1, 2])319b = a320321cmp = a == b322assert isinstance(cmp, pl.Series)323assert cmp.sum() == 2324assert (a != b).sum() == 0325assert (a >= b).sum() == 2326assert (a <= b).sum() == 2327assert (a > b).sum() == 0328assert (a < b).sum() == 0329assert a.sum() == 3330assert_series_equal(a, b)331332a = pl.Series("name", ["ham", "foo", "bar"])333assert_series_equal((a == "ham"), pl.Series("name", [True, False, False]))334335a = pl.Series("name", [[1], [1, 2], [2, 3]])336assert_series_equal((a == [1]), pl.Series("name", [True, False, False]))337338339def test_agg() -> None:340series = pl.Series("a", [1, 2])341assert series.mean() == 1.5342assert series.min() == 1343assert series.max() == 2344345346def test_date_agg() -> None:347series = pl.Series(348[349date(2022, 8, 2),350date(2096, 8, 1),351date(9009, 9, 9),352],353dtype=pl.Date,354)355assert series.min() == date(2022, 8, 2)356assert series.max() == date(9009, 9, 9)357358359@pytest.mark.parametrize(360("s", "min", "max"),361[362(pl.Series(["c", "b", "a"], dtype=pl.Categorical()), "a", "c"),363(pl.Series([None, "a", "c", "b"], dtype=pl.Categorical()), "a", "c"),364(pl.Series([], dtype=pl.Categorical()), None, None),365(pl.Series(["c", "b", "a"], dtype=pl.Enum(["c", "b", "a"])), "c", "a"),366(pl.Series(["c", "b", "a"], dtype=pl.Enum(["c", "b", "a", "d"])), "c", "a"),367],368)369def test_categorical_agg(s: pl.Series, min: str | None, max: str | None) -> None:370assert s.min() == min371assert s.max() == max372373374def test_add_string() -> None:375s = pl.Series(["hello", "weird"])376result = s + " world"377print(result)378assert_series_equal(result, pl.Series(["hello world", "weird world"]))379380result = "pfx:" + s381assert_series_equal(result, pl.Series("literal", ["pfx:hello", "pfx:weird"]))382383384@pytest.mark.parametrize(385("data", "expected_dtype"),386[387(100, pl.Int64),388(8.5, pl.Float64),389("서울특별시", pl.String),390(date.today(), pl.Date),391(datetime.now(), pl.Datetime("us")),392(time(23, 59, 59), pl.Time),393(timedelta(hours=7, seconds=123), pl.Duration("us")),394],395)396def test_unknown_dtype(data: Any, expected_dtype: PolarsDataType) -> None:397# if given 'Unknown', should be able to infer the correct dtype398s = pl.Series([data], dtype=Unknown)399assert s.dtype == expected_dtype400assert s.to_list() == [data]401402403def test_various() -> None:404a = pl.Series("a", [1, 2])405assert a.is_null().sum() == 0406assert a.name == "a"407408a = a.rename("b")409assert a.name == "b"410assert a.len() == 2411assert len(a) == 2412413a.append(a.clone())414assert_series_equal(a, pl.Series("b", [1, 2, 1, 2]))415416a = pl.Series("a", range(20))417assert a.head(5).len() == 5418assert a.tail(5).len() == 5419assert (a.head(5) != a.tail(5)).all()420421a = pl.Series("a", [2, 1, 4])422a.sort(in_place=True)423assert_series_equal(a, pl.Series("a", [1, 2, 4]))424a = pl.Series("a", [2, 1, 1, 4, 4, 4])425assert_series_equal(426a.arg_unique(), pl.Series("a", [0, 1, 3], dtype=pl.get_index_type())427)428429assert_series_equal(a.gather([2, 3]), pl.Series("a", [1, 4]))430431432def test_series_dtype_is() -> None:433s = pl.Series("s", [1, 2, 3])434435assert s.dtype.is_numeric()436assert s.dtype.is_integer()437assert s.dtype.is_signed_integer()438assert not s.dtype.is_unsigned_integer()439assert (s * 0.99).dtype.is_float()440441s = pl.Series("s", [1, 2, 3], dtype=pl.UInt8)442assert s.dtype.is_numeric()443assert s.dtype.is_integer()444assert not s.dtype.is_signed_integer()445assert s.dtype.is_unsigned_integer()446447s = pl.Series("bool", [True, None, False])448assert not s.dtype.is_numeric()449450s = pl.Series("s", ["testing..."])451assert s.dtype == pl.String452assert s.dtype != pl.Boolean453454s = pl.Series("s", [], dtype=pl.Decimal(20, 15))455assert not s.dtype.is_float()456assert s.dtype.is_numeric()457assert s.is_empty()458459s = pl.Series("s", [], dtype=pl.Datetime("ms", time_zone="UTC"))460assert s.dtype.is_temporal()461462463def test_series_head_tail_limit() -> None:464s = pl.Series(range(10))465466assert_series_equal(s.head(5), pl.Series(range(5)))467assert_series_equal(s.limit(5), s.head(5))468assert_series_equal(s.tail(5), pl.Series(range(5, 10)))469470# check if it doesn't fail when out of bounds471assert s.head(100).len() == 10472assert s.limit(100).len() == 10473assert s.tail(100).len() == 10474475# negative values476assert_series_equal(s.head(-7), pl.Series(range(3)))477assert s.head(-2).len() == 8478assert_series_equal(s.tail(-8), pl.Series(range(8, 10)))479assert s.head(-6).len() == 4480481# negative values out of bounds482assert s.head(-12).len() == 0483assert s.limit(-12).len() == 0484assert s.tail(-12).len() == 0485486487def test_filter_ops() -> None:488a = pl.Series("a", range(20))489assert a.filter(a > 1).len() == 18490assert a.filter(a < 1).len() == 1491assert a.filter(a <= 1).len() == 2492assert a.filter(a >= 1).len() == 19493assert a.filter(a == 1).len() == 1494assert a.filter(a != 1).len() == 19495496497def test_cast() -> None:498a = pl.Series("a", range(20))499500assert a.cast(pl.Float32).dtype == pl.Float32501assert a.cast(pl.Float64).dtype == pl.Float64502assert a.cast(pl.Int32).dtype == pl.Int32503assert a.cast(pl.UInt32).dtype == pl.UInt32504assert a.cast(pl.Datetime).dtype == pl.Datetime505assert a.cast(pl.Date).dtype == pl.Date506507# display failed values, GH#4706508with pytest.raises(InvalidOperationError, match="foobar"):509pl.Series(["1", "2", "3", "4", "foobar"]).cast(int)510511512@pytest.mark.parametrize(513"test_data",514[515[1, None, 2],516["abc", None, "xyz"],517[None, datetime.now()],518[[1, 2], [3, 4], None],519],520)521def test_to_pandas(test_data: list[Any]) -> None:522a = pl.Series("s", test_data)523b = a.to_pandas()524525assert a.name == b.name526assert b.isnull().sum() == 1527528vals_b: list[Any]529if a.dtype == pl.List:530vals_b = [(None if x is None else x.tolist()) for x in b]531else:532vals_b = b.replace({np.nan: None}).values.tolist() # type: ignore[dict-item]533534assert vals_b == test_data535536try:537c = a.to_pandas(use_pyarrow_extension_array=True)538assert a.name == c.name539assert c.isnull().sum() == 1540vals_c = [None if x is pd.NA else x for x in c.tolist()]541assert vals_c == test_data542except ModuleNotFoundError:543# Skip test if pandas>=1.5.0 or Pyarrow>=8.0.0 is not installed.544pass545546547def test_series_to_list() -> None:548s = pl.Series("a", range(20))549result = s.to_list()550assert isinstance(result, list)551assert len(result) == 20552553a = pl.Series("a", [1, None, 2])554assert a.null_count() == 1555assert a.to_list() == [1, None, 2]556557558@pytest.mark.may_fail_cloud # reason: list.to_struct is a eager operation559def test_to_struct() -> None:560s = pl.Series("nums", ["12 34", "56 78", "90 00"]).str.extract_all(r"\d+")561562assert s.list.to_struct().struct.fields == ["field_0", "field_1"]563assert s.list.to_struct(fields=lambda idx: f"n{idx:02}").struct.fields == [564"n00",565"n01",566]567assert_frame_equal(568s.list.to_struct(fields=["one", "two"]).struct.unnest(),569pl.DataFrame({"one": ["12", "56", "90"], "two": ["34", "78", "00"]}),570)571572573def test_to_struct_empty() -> None:574df = pl.DataFrame({"y": [[], [], []]}, schema={"y": pl.List(pl.Int64)})575empty_df = df.select(pl.col("y").list.to_struct(fields=[]).struct.unnest())576assert empty_df.shape == (0, 0)577578579def test_sort() -> None:580a = pl.Series("a", [2, 1, 3])581assert_series_equal(a.sort(), pl.Series("a", [1, 2, 3]))582assert_series_equal(a.sort(descending=True), pl.Series("a", [3, 2, 1]))583584585def test_rechunk() -> None:586a = pl.Series("a", [1, 2, 3])587b = pl.Series("b", [4, 5, 6])588a.append(b)589assert a.n_chunks() == 2590assert a.rechunk(in_place=False).n_chunks() == 1591a.rechunk(in_place=True)592assert a.n_chunks() == 1593594595def test_indexing() -> None:596a = pl.Series("a", [1, 2, None])597assert a[1] == 2598assert a[2] is None599b = pl.Series("b", [True, False])600assert b[0]601assert not b[1]602a = pl.Series("a", ["a", None])603assert a[0] == "a"604assert a[1] is None605a = pl.Series("a", [0.1, None])606assert a[0] == 0.1607assert a[1] is None608609610def test_arrow() -> None:611a = pl.Series("a", [1, 2, 3, None])612out = a.to_arrow()613assert out == pa.array([1, 2, 3, None])614615b = pl.Series("b", [1.0, 2.0, 3.0, None])616out = b.to_arrow()617assert out == pa.array([1.0, 2.0, 3.0, None])618619c = pl.Series("c", ["A", "BB", "CCC", None])620out = c.to_arrow()621assert out == pa.array(["A", "BB", "CCC", None], type=pa.large_string())622assert_series_equal(pl.from_arrow(out), c.rename("")) # type: ignore[arg-type]623624out = c.to_frame().to_arrow()["c"]625assert isinstance(out, (pa.Array, pa.ChunkedArray))626assert_series_equal(pl.from_arrow(out), c) # type: ignore[arg-type]627assert_series_equal(pl.from_arrow(out, schema=["x"]), c.rename("x")) # type: ignore[arg-type]628629d = pl.Series("d", [None, None, None], pl.Null)630out = d.to_arrow()631assert out == pa.nulls(3)632633s = cast(634"pl.Series",635pl.from_arrow(pa.array([["foo"], ["foo", "bar"]], pa.list_(pa.utf8()))),636)637assert s.dtype == pl.List638639640def test_arrow_cat() -> None:641# categorical dtype tests (including various forms of empty pyarrow array)642arr0 = pa.array(["foo", "bar"], pa.dictionary(pa.int32(), pa.utf8()))643assert_series_equal(644pl.Series("arr", ["foo", "bar"], pl.Categorical), pl.Series("arr", arr0)645)646arr1 = pa.array(["xxx", "xxx", None, "yyy"]).dictionary_encode()647arr2 = pa.chunked_array([], arr1.type)648arr3 = pa.array([], arr1.type)649arr4 = pa.array([]).dictionary_encode()650651assert_series_equal(652pl.Series("arr", ["xxx", "xxx", None, "yyy"], dtype=pl.Categorical),653pl.Series("arr", arr1),654)655for arr in (arr2, arr3):656assert_series_equal(657pl.Series("arr", [], dtype=pl.Categorical), pl.Series("arr", arr)658)659assert_series_equal(pl.Series("arr", [], dtype=pl.Null), pl.Series("arr", arr4))660661662def test_pycapsule_interface() -> None:663a = pl.Series("a", [1, 2, 3, None])664out = pa.chunked_array(PyCapsuleStreamHolder(a))665out_arr = out.combine_chunks()666assert out_arr == pa.array([1, 2, 3, None])667668669def test_get() -> None:670a = pl.Series("a", [1, 2, 3])671pos_idxs = pl.Series("idxs", [2, 0, 1, 0], dtype=pl.Int8)672neg_and_pos_idxs = pl.Series(673"neg_and_pos_idxs", [-2, 1, 0, -1, 2, -3], dtype=pl.Int8674)675empty_idxs = pl.Series("idxs", [], dtype=pl.Int8)676empty_ints: list[int] = []677assert a[0] == 1678assert a[:2].to_list() == [1, 2]679assert a[range(1)].to_list() == [1]680assert a[range(0, 4, 2)].to_list() == [1, 3]681assert a[:0].to_list() == []682assert a[empty_ints].to_list() == []683assert a[neg_and_pos_idxs.to_list()].to_list() == [2, 2, 1, 3, 3, 1]684for dtype in (685pl.UInt8,686pl.UInt16,687pl.UInt32,688pl.UInt64,689pl.Int8,690pl.Int16,691pl.Int32,692pl.Int64,693):694assert a[pos_idxs.cast(dtype)].to_list() == [3, 1, 2, 1]695assert a[pos_idxs.cast(dtype).to_numpy()].to_list() == [3, 1, 2, 1]696assert a[empty_idxs.cast(dtype)].to_list() == []697assert a[empty_idxs.cast(dtype).to_numpy()].to_list() == []698699for dtype in (pl.Int8, pl.Int16, pl.Int32, pl.Int64):700nps = a[neg_and_pos_idxs.cast(dtype).to_numpy()]701assert nps.to_list() == [2, 2, 1, 3, 3, 1]702703704def test_set() -> None:705a = pl.Series("a", [True, False, True])706mask = pl.Series("msk", [True, False, True])707a[mask] = False708assert_series_equal(a, pl.Series("a", [False] * 3))709710711def test_set_value_as_list_fail() -> None:712# only allowed for numerical physical types713s = pl.Series("a", [1, 2, 3])714s[[0, 2]] = [4, 5]715assert s.to_list() == [4, 2, 5]716717# for other types it is not allowed718s = pl.Series("a", ["a", "b", "c"])719with pytest.raises(TypeError):720s[[0, 1]] = ["d", "e"]721722s = pl.Series("a", [True, False, False])723with pytest.raises(TypeError):724s[[0, 1]] = [True, False]725726727@pytest.mark.parametrize("key", [True, False, 1.0])728def test_set_invalid_key(key: Any) -> None:729s = pl.Series("a", [1, 2, 3])730with pytest.raises(TypeError):731s[key] = 1732733734@pytest.mark.parametrize(735"key",736[737pl.Series([False, True, True]),738pl.Series([1, 2], dtype=UInt32),739pl.Series([1, 2], dtype=UInt64),740],741)742def test_set_key_series(key: pl.Series) -> None:743"""Only UInt32/UInt64/bool are allowed."""744s = pl.Series("a", [1, 2, 3])745s[key] = 4746assert_series_equal(s, pl.Series("a", [1, 4, 4]))747748749def test_set_np_array_boolean_mask() -> None:750a = pl.Series("a", [1, 2, 3])751mask = np.array([True, False, True])752a[mask] = 4753assert_series_equal(a, pl.Series("a", [4, 2, 4]))754755756@pytest.mark.parametrize("dtype", [np.int32, np.int64, np.uint32, np.uint64])757def test_set_np_array(dtype: Any) -> None:758a = pl.Series("a", [1, 2, 3])759idx = np.array([0, 2], dtype=dtype)760a[idx] = 4761assert_series_equal(a, pl.Series("a", [4, 2, 4]))762763764@pytest.mark.parametrize("idx", [[0, 2], (0, 2)])765def test_set_list_and_tuple(idx: list[int] | tuple[int]) -> None:766a = pl.Series("a", [1, 2, 3])767a[idx] = 4768assert_series_equal(a, pl.Series("a", [4, 2, 4]))769770771def test_init_nested_tuple() -> None:772s1 = pl.Series("s", (1, 2, 3))773assert s1.to_list() == [1, 2, 3]774775s2 = pl.Series("s", ((1, 2, 3),), dtype=pl.List(pl.UInt8))776assert s2.to_list() == [[1, 2, 3]]777assert s2.dtype == pl.List(pl.UInt8)778779s3 = pl.Series("s", ((1, 2, 3), (1, 2, 3)), dtype=pl.List(pl.Int32))780assert s3.to_list() == [[1, 2, 3], [1, 2, 3]]781assert s3.dtype == pl.List(pl.Int32)782783784def test_fill_null() -> None:785s = pl.Series("a", [1, 2, None])786assert_series_equal(s.fill_null(strategy="forward"), pl.Series("a", [1, 2, 2]))787assert_series_equal(s.fill_null(14), pl.Series("a", [1, 2, 14], dtype=Int64))788789a = pl.Series("a", [0.0, 1.0, None, 2.0, None, 3.0])790791assert a.fill_null(0).to_list() == [0.0, 1.0, 0.0, 2.0, 0.0, 3.0]792assert a.fill_null(strategy="zero").to_list() == [0.0, 1.0, 0.0, 2.0, 0.0, 3.0]793assert a.fill_null(strategy="max").to_list() == [0.0, 1.0, 3.0, 2.0, 3.0, 3.0]794assert a.fill_null(strategy="min").to_list() == [0.0, 1.0, 0.0, 2.0, 0.0, 3.0]795assert a.fill_null(strategy="one").to_list() == [0.0, 1.0, 1.0, 2.0, 1.0, 3.0]796assert a.fill_null(strategy="forward").to_list() == [0.0, 1.0, 1.0, 2.0, 2.0, 3.0]797assert a.fill_null(strategy="backward").to_list() == [0.0, 1.0, 2.0, 2.0, 3.0, 3.0]798assert a.fill_null(strategy="mean").to_list() == [0.0, 1.0, 1.5, 2.0, 1.5, 3.0]799assert a.forward_fill().to_list() == [0.0, 1.0, 1.0, 2.0, 2.0, 3.0]800assert a.backward_fill().to_list() == [0.0, 1.0, 2.0, 2.0, 3.0, 3.0]801802b = pl.Series("b", ["a", None, "c", None, "e"])803assert b.fill_null(strategy="min").to_list() == ["a", "a", "c", "a", "e"]804assert b.fill_null(strategy="max").to_list() == ["a", "e", "c", "e", "e"]805assert b.fill_null(strategy="zero").to_list() == ["a", "", "c", "", "e"]806assert b.fill_null(strategy="forward").to_list() == ["a", "a", "c", "c", "e"]807assert b.fill_null(strategy="backward").to_list() == ["a", "c", "c", "e", "e"]808809c = pl.Series("c", [b"a", None, b"c", None, b"e"])810assert c.fill_null(strategy="min").to_list() == [b"a", b"a", b"c", b"a", b"e"]811assert c.fill_null(strategy="max").to_list() == [b"a", b"e", b"c", b"e", b"e"]812assert c.fill_null(strategy="zero").to_list() == [b"a", b"", b"c", b"", b"e"]813assert c.fill_null(strategy="forward").to_list() == [b"a", b"a", b"c", b"c", b"e"]814assert c.fill_null(strategy="backward").to_list() == [b"a", b"c", b"c", b"e", b"e"]815816df = pl.DataFrame(817[818pl.Series("i32", [1, 2, None], dtype=pl.Int32),819pl.Series("i64", [1, 2, None], dtype=pl.Int64),820pl.Series("f32", [1, 2, None], dtype=pl.Float32),821pl.Series("cat", ["a", "b", None], dtype=pl.Categorical),822pl.Series("str", ["a", "b", None], dtype=pl.String),823pl.Series("bool", [True, True, None], dtype=pl.Boolean),824]825)826827assert df.fill_null(0, matches_supertype=False).fill_null("bar").fill_null(828False829).to_dict(as_series=False) == {830"i32": [1, 2, None],831"i64": [1, 2, 0],832"f32": [1.0, 2.0, None],833"cat": ["a", "b", "bar"],834"str": ["a", "b", "bar"],835"bool": [True, True, False],836}837838assert df.fill_null(0, matches_supertype=True).fill_null("bar").fill_null(839False840).to_dict(as_series=False) == {841"i32": [1, 2, 0],842"i64": [1, 2, 0],843"f32": [1.0, 2.0, 0.0],844"cat": ["a", "b", "bar"],845"str": ["a", "b", "bar"],846"bool": [True, True, False],847}848df = pl.DataFrame({"a": [1, None, 2, None]})849850out = df.with_columns(851pl.col("a").cast(pl.UInt8).alias("u8"),852pl.col("a").cast(pl.UInt16).alias("u16"),853pl.col("a").cast(pl.UInt32).alias("u32"),854pl.col("a").cast(pl.UInt64).alias("u64"),855).fill_null(3)856857assert out.to_dict(as_series=False) == {858"a": [1, 3, 2, 3],859"u8": [1, 3, 2, 3],860"u16": [1, 3, 2, 3],861"u32": [1, 3, 2, 3],862"u64": [1, 3, 2, 3],863}864assert out.dtypes == [pl.Int64, pl.UInt8, pl.UInt16, pl.UInt32, pl.UInt64]865866867def test_str_series_min_max_10674() -> None:868str_series = pl.Series("b", ["a", None, "c", None, "e"], dtype=pl.String)869assert str_series.min() == "a"870assert str_series.max() == "e"871assert str_series.sort(descending=False).min() == "a"872assert str_series.sort(descending=True).max() == "e"873874875def test_fill_nan() -> None:876nan = float("nan")877a = pl.Series("a", [1.0, nan, 2.0, nan, 3.0])878assert_series_equal(a.fill_nan(None), pl.Series("a", [1.0, None, 2.0, None, 3.0]))879assert_series_equal(a.fill_nan(0), pl.Series("a", [1.0, 0.0, 2.0, 0.0, 3.0]))880881882def test_map_elements() -> None:883a = pl.Series("a", [1, 2, None])884with pytest.warns(PolarsInefficientMapWarning):885b = a.map_elements(lambda x: x**2, return_dtype=pl.Int64)886assert list(b) == [1, 4, None]887888a = pl.Series("a", ["foo", "bar", None])889with pytest.warns(PolarsInefficientMapWarning):890b = a.map_elements(lambda x: x + "py", return_dtype=pl.String)891assert list(b) == ["foopy", "barpy", None]892893b = a.map_elements(lambda x: len(x), return_dtype=pl.Int32)894assert list(b) == [3, 3, None]895896b = a.map_elements(lambda x: len(x))897assert list(b) == [3, 3, None]898899# just check that it runs (somehow problem with conditional compilation)900a = pl.Series("a", [2, 2, 3]).cast(pl.Datetime)901a.map_elements(lambda x: x)902a = pl.Series("a", [2, 2, 3]).cast(pl.Date)903a.map_elements(lambda x: x)904905906def test_shape() -> None:907s = pl.Series([1, 2, 3])908assert s.shape == (3,)909910911@pytest.mark.parametrize("arrow_available", [True, False])912def test_create_list_series(913arrow_available: bool, plmonkeypatch: PlMonkeyPatch914) -> None:915plmonkeypatch.setattr(pl.series.series, "_PYARROW_AVAILABLE", arrow_available)916a = [[1, 2], None, [None, 3]]917s = pl.Series("", a)918assert s.to_list() == a919920921def test_iter() -> None:922s = pl.Series("", [1, 2, 3])923924itr = s.__iter__()925assert itr.__next__() == 1926assert itr.__next__() == 2927assert itr.__next__() == 3928assert sum(s) == 6929930931def test_empty() -> None:932a = pl.Series(dtype=pl.Int8)933assert a.dtype == pl.Int8934assert a.is_empty()935936a = pl.Series()937assert a.dtype == pl.Null938assert a.is_empty()939940a = pl.Series("name", [])941assert a.dtype == pl.Null942assert a.is_empty()943944a = pl.Series(values=(), dtype=pl.Int8)945assert a.dtype == pl.Int8946assert a.is_empty()947948assert_series_equal(pl.Series(), pl.Series())949assert_series_equal(950pl.Series(dtype=pl.Int32), pl.Series(dtype=pl.Int64), check_dtypes=False951)952953with pytest.raises(TypeError, match="ambiguous"):954not pl.Series()955956957def test_round() -> None:958a = pl.Series("f", [1.003, 2.003])959b = a.round(2)960assert b.to_list() == [1.00, 2.00]961962b = a.round()963assert b.to_list() == [1.0, 2.0]964965966def test_round_int() -> None:967s = pl.Series([1, 2, 3])968assert_series_equal(s, s.round())969970971@pytest.mark.parametrize(972("series", "digits", "expected_result"),973[974pytest.param(pl.Series([1.234, 0.1234]), 2, pl.Series([1.2, 0.12]), id="f64"),975pytest.param(976pl.Series([1.234, 0.1234]).cast(pl.Float32),9772,978pl.Series([1.2, 0.12]).cast(pl.Float32),979id="f32",980),981pytest.param(pl.Series([123400, 1234]), 2, pl.Series([120000, 1200]), id="i64"),982pytest.param(983pl.Series([123400, 1234]).cast(pl.Int32),9842,985pl.Series([120000, 1200]).cast(pl.Int32),986id="i32",987),988pytest.param(989pl.Series([0.0]), 2, pl.Series([0.0]), id="0 should remain the same"990),991],992)993def test_round_sig_figs(994series: pl.Series, digits: int, expected_result: pl.Series995) -> None:996result = series.round_sig_figs(digits=digits)997assert_series_equal(result, expected_result)9989991000def test_round_sig_figs_raises_exc() -> None:1001with pytest.raises(pl.exceptions.InvalidOperationError):1002pl.Series([1.234, 0.1234]).round_sig_figs(digits=0)100310041005def test_apply_list_out() -> None:1006s = pl.Series("count", [3, 2, 2])1007out = s.map_elements(lambda val: pl.repeat(val, val, eager=True))1008assert out[0].to_list() == [3, 3, 3]1009assert out[1].to_list() == [2, 2]1010assert out[2].to_list() == [2, 2]101110121013def test_reinterpret() -> None:1014s = pl.Series("a", [1, 1, 2], dtype=pl.UInt64)1015assert s.reinterpret(signed=True).dtype == pl.Int641016df = pl.DataFrame([s])1017assert df.select([pl.col("a").reinterpret(signed=True)])["a"].dtype == pl.Int64101810191020def test_mode() -> None:1021s = pl.Series("a", [1, 1, 2])1022assert s.mode().to_list() == [1]1023assert s.set_sorted().mode().to_list() == [1]10241025df = pl.DataFrame([s])1026assert df.select([pl.col("a").mode()])["a"].to_list() == [1]1027assert (1028pl.Series(["foo", "bar", "buz", "bar"], dtype=pl.Categorical).mode().item()1029== "bar"1030)1031assert pl.Series([1.0, 2.0, 3.0, 2.0]).mode().item() == 2.01032assert pl.Series(["a", "b", "c", "b"]).mode().item() == "b"10331034# sorted data1035assert set(pl.int_range(0, 3, eager=True).mode().to_list()) == {0, 1, 2}103610371038def test_diff() -> None:1039s = pl.Series("a", [1, 2, 3, 2, 2, 3, 0])10401041assert_series_equal(1042s.diff(),1043pl.Series("a", [None, 1, 1, -1, 0, 1, -3]),1044)1045assert_series_equal(1046s.diff(null_behavior="drop"),1047pl.Series("a", [1, 1, -1, 0, 1, -3]),1048)104910501051def test_diff_negative() -> None:1052s = pl.Series("a", [1, 2, 3, 2, 2, 3, 0])10531054assert_series_equal(1055s.diff(-1),1056pl.Series("a", [-1, -1, 1, 0, -1, 3, None]),1057)1058assert_series_equal(1059s.diff(-1, null_behavior="drop"),1060pl.Series("a", [-1, -1, 1, 0, -1, 3]),1061)106210631064def test_skew() -> None:1065s = pl.Series("a", [1, 2, 3, 2, 2, 3, 0])10661067assert s.skew(bias=True) == pytest.approx(-0.5953924651018018)1068assert s.skew(bias=False) == pytest.approx(-0.7717168360221258)10691070df = pl.DataFrame([s])1071assert np.isclose(1072df.select(pl.col("a").skew(bias=False))["a"][0], -0.77171683602212581073)107410751076def test_kurtosis() -> None:1077s = pl.Series("a", [1, 2, 3, 2, 2, 3, 0])1078expected = -0.640625000000000410791080assert s.kurtosis() == pytest.approx(expected)1081df = pl.DataFrame([s])1082assert np.isclose(df.select(pl.col("a").kurtosis())["a"][0], expected)108310841085def test_sqrt() -> None:1086s = pl.Series("a", [1, 2])1087assert_series_equal(s.sqrt(), pl.Series("a", [1.0, np.sqrt(2)]))1088df = pl.DataFrame([s])1089assert_series_equal(1090df.select(pl.col("a").sqrt())["a"], pl.Series("a", [1.0, np.sqrt(2)])1091)109210931094def test_cbrt() -> None:1095s = pl.Series("a", [1, 2])1096assert_series_equal(s.cbrt(), pl.Series("a", [1.0, np.cbrt(2)]))1097df = pl.DataFrame([s])1098assert_series_equal(1099df.select(pl.col("a").cbrt())["a"], pl.Series("a", [1.0, np.cbrt(2)])1100)110111021103def test_range() -> None:1104s1 = pl.Series("a", [1, 2, 3, 2, 2, 3, 0])1105assert_series_equal(s1[2:5], s1[range(2, 5)])11061107ranges = [range(-2, 1), range(3), range(2, 8, 2)]11081109s2 = pl.Series("b", ranges, dtype=pl.List(pl.Int8))1110assert s2.to_list() == [[-2, -1, 0], [0, 1, 2], [2, 4, 6]]1111assert s2.dtype == pl.List(pl.Int8)1112assert s2.name == "b"11131114s3 = pl.Series("c", (ranges for _ in range(3)))1115assert s3.to_list() == [1116[[-2, -1, 0], [0, 1, 2], [2, 4, 6]],1117[[-2, -1, 0], [0, 1, 2], [2, 4, 6]],1118[[-2, -1, 0], [0, 1, 2], [2, 4, 6]],1119]1120assert s3.dtype == pl.List(pl.List(pl.Int64))11211122df = pl.DataFrame([s1])1123assert_frame_equal(df[2:5], df[range(2, 5)])112411251126def test_strict_cast() -> None:1127with pytest.raises(InvalidOperationError):1128pl.Series("a", [2**16]).cast(dtype=pl.Int16, strict=True)1129with pytest.raises(InvalidOperationError):1130pl.DataFrame({"a": [2**16]}).select([pl.col("a").cast(pl.Int16, strict=True)])113111321133def test_floor_divide() -> None:1134s = pl.Series("a", [1, 2, 3])1135assert_series_equal(s // 2, pl.Series("a", [0, 1, 1]))1136assert_series_equal(1137pl.DataFrame([s]).select(pl.col("a") // 2)["a"], pl.Series("a", [0, 1, 1])1138)113911401141def test_true_divide() -> None:1142s = pl.Series("a", [1, 2])1143assert_series_equal(s / 2, pl.Series("a", [0.5, 1.0]))1144assert_series_equal(1145pl.DataFrame([s]).select(pl.col("a") / 2)["a"], pl.Series("a", [0.5, 1.0])1146)11471148# rtruediv1149assert_series_equal(1150pl.DataFrame([s]).select(2 / pl.col("a"))["literal"],1151pl.Series("literal", [2.0, 1.0]),1152)11531154# https://github.com/pola-rs/polars/issues/13691155vals = [3000000000, 2, 3]1156foo = pl.Series(vals)1157assert_series_equal(foo / 1, pl.Series(vals, dtype=Float64))1158assert_series_equal(1159pl.DataFrame({"a": vals}).select([pl.col("a") / 1])["a"],1160pl.Series("a", vals, dtype=Float64),1161)116211631164def test_bitwise() -> None:1165a = pl.Series("a", [1, 2, 3])1166b = pl.Series("b", [3, 4, 5])1167assert_series_equal(a & b, pl.Series("a", [1, 0, 1]))1168assert_series_equal(a | b, pl.Series("a", [3, 6, 7]))1169assert_series_equal(a ^ b, pl.Series("a", [2, 6, 6]))11701171df = pl.DataFrame([a, b])1172out = df.select(1173(pl.col("a") & pl.col("b")).alias("and"),1174(pl.col("a") | pl.col("b")).alias("or"),1175(pl.col("a") ^ pl.col("b")).alias("xor"),1176)1177assert_series_equal(out["and"], pl.Series("and", [1, 0, 1]))1178assert_series_equal(out["or"], pl.Series("or", [3, 6, 7]))1179assert_series_equal(out["xor"], pl.Series("xor", [2, 6, 6]))11801181# ensure mistaken use of logical 'and'/'or' raises an exception1182with pytest.raises(TypeError, match="ambiguous"):1183a and b # type: ignore[redundant-expr]11841185with pytest.raises(TypeError, match="ambiguous"):1186a or b # type: ignore[redundant-expr]118711881189def test_from_generator_or_iterable() -> None:1190# generator function1191def gen(n: int) -> Iterator[int]:1192yield from range(n)11931194# iterable object1195class Data:1196def __init__(self, n: int) -> None:1197self._n = n11981199def __iter__(self) -> Iterator[int]:1200yield from gen(self._n)12011202expected = pl.Series("s", range(10))1203assert expected.dtype == pl.Int6412041205for generated_series in (1206pl.Series("s", values=gen(10)),1207pl.Series("s", values=Data(10)),1208pl.Series("s", values=(x for x in gen(10))),1209):1210assert_series_equal(expected, generated_series)12111212# test 'iterable_to_pyseries' directly to validate 'chunk_size' behaviour1213ps1 = iterable_to_pyseries("s", gen(10), dtype=pl.UInt8)1214ps2 = iterable_to_pyseries("s", gen(10), dtype=pl.UInt8, chunk_size=3)1215ps3 = iterable_to_pyseries("s", Data(10), dtype=pl.UInt8, chunk_size=6)12161217expected = pl.Series("s", range(10), dtype=pl.UInt8)1218assert expected.dtype == pl.UInt812191220for ps in (ps1, ps2, ps3):1221generated_series = pl.Series("s")1222generated_series._s = ps1223assert_series_equal(expected, generated_series)12241225# empty generator1226assert_series_equal(pl.Series("s", []), pl.Series("s", values=gen(0)))122712281229def test_from_sequences(plmonkeypatch: PlMonkeyPatch) -> None:1230# test int, str, bool, flt1231values = [1232[[1], [None, 3]],1233[["foo"], [None, "bar"]],1234[[True], [None, False]],1235[[1.0], [None, 3.0]],1236]12371238for vals in values:1239plmonkeypatch.setattr(pl.series.series, "_PYARROW_AVAILABLE", False)1240a = pl.Series("a", vals)1241plmonkeypatch.setattr(pl.series.series, "_PYARROW_AVAILABLE", True)1242b = pl.Series("a", vals)1243assert_series_equal(a, b)1244assert a.to_list() == vals124512461247def test_comparisons_int_series_to_float() -> None:1248srs_int = pl.Series([1, 2, 3, 4])12491250assert_series_equal(srs_int - 1.0, pl.Series([0.0, 1.0, 2.0, 3.0]))1251assert_series_equal(srs_int + 1.0, pl.Series([2.0, 3.0, 4.0, 5.0]))1252assert_series_equal(srs_int * 2.0, pl.Series([2.0, 4.0, 6.0, 8.0]))1253assert_series_equal(srs_int / 2.0, pl.Series([0.5, 1.0, 1.5, 2.0]))1254assert_series_equal(srs_int % 2.0, pl.Series([1.0, 0.0, 1.0, 0.0]))1255assert_series_equal(4.0 % srs_int, pl.Series([0.0, 0.0, 1.0, 0.0]))12561257assert_series_equal(srs_int // 2.0, pl.Series([0.0, 1.0, 1.0, 2.0]))1258assert_series_equal(srs_int < 3.0, pl.Series([True, True, False, False]))1259assert_series_equal(srs_int <= 3.0, pl.Series([True, True, True, False]))1260assert_series_equal(srs_int > 3.0, pl.Series([False, False, False, True]))1261assert_series_equal(srs_int >= 3.0, pl.Series([False, False, True, True]))1262assert_series_equal(srs_int == 3.0, pl.Series([False, False, True, False]))1263assert_series_equal(srs_int - True, pl.Series([0, 1, 2, 3]))126412651266def test_comparisons_int_series_to_float_scalar() -> None:1267srs_int = pl.Series([1, 2, 3, 4])12681269assert_series_equal(srs_int < 1.5, pl.Series([True, False, False, False]))1270assert_series_equal(srs_int > 1.5, pl.Series([False, True, True, True]))127112721273def test_comparisons_datetime_series_to_date_scalar() -> None:1274srs_date = pl.Series([date(2023, 1, 1), date(2023, 1, 2), date(2023, 1, 3)])1275dt = datetime(2023, 1, 1, 12, 0, 0)12761277assert_series_equal(srs_date < dt, pl.Series([True, False, False]))1278assert_series_equal(srs_date > dt, pl.Series([False, True, True]))127912801281def test_comparisons_float_series_to_int() -> None:1282srs_float = pl.Series([1.0, 2.0, 3.0, 4.0])12831284assert_series_equal(srs_float - 1, pl.Series([0.0, 1.0, 2.0, 3.0]))1285assert_series_equal(srs_float + 1, pl.Series([2.0, 3.0, 4.0, 5.0]))1286assert_series_equal(srs_float * 2, pl.Series([2.0, 4.0, 6.0, 8.0]))1287assert_series_equal(srs_float / 2, pl.Series([0.5, 1.0, 1.5, 2.0]))1288assert_series_equal(srs_float % 2, pl.Series([1.0, 0.0, 1.0, 0.0]))1289assert_series_equal(4 % srs_float, pl.Series([0.0, 0.0, 1.0, 0.0]))12901291assert_series_equal(srs_float // 2, pl.Series([0.0, 1.0, 1.0, 2.0]))1292assert_series_equal(srs_float < 3, pl.Series([True, True, False, False]))1293assert_series_equal(srs_float <= 3, pl.Series([True, True, True, False]))1294assert_series_equal(srs_float > 3, pl.Series([False, False, False, True]))1295assert_series_equal(srs_float >= 3, pl.Series([False, False, True, True]))1296assert_series_equal(srs_float == 3, pl.Series([False, False, True, False]))1297assert_series_equal(srs_float - True, pl.Series([0.0, 1.0, 2.0, 3.0]))129812991300def test_comparisons_bool_series_to_int() -> None:1301srs_bool = pl.Series([True, False])13021303# (native bool comparison should work...)1304for t, f in ((True, False), (False, True)):1305assert list(srs_bool == t) == list(srs_bool != f) == [t, f]13061307# TODO: do we want this to work?1308assert_series_equal(srs_bool / 1, pl.Series([True, False], dtype=Float64))1309match = (1310r"cannot do arithmetic with Series of dtype: Boolean"1311r" and argument of type: 'bool'"1312)1313with pytest.raises(TypeError, match=match):1314srs_bool - 11315with pytest.raises(TypeError, match=match):1316srs_bool + 11317match = (1318r"cannot do arithmetic with Series of dtype: Boolean"1319r" and argument of type: 'bool'"1320)1321with pytest.raises(TypeError, match=match):1322srs_bool % 21323with pytest.raises(TypeError, match=match):1324srs_bool * 113251326from operator import ge, gt, le, lt13271328for op in (ge, gt, le, lt):1329for scalar in (0, 1.0, True, False):1330op_str = op.__name__.replace("e", "t_eq")1331with pytest.raises(1332NotImplementedError,1333match=rf"Series of type Boolean does not have {op_str} operator",1334):1335op(srs_bool, scalar)133613371338@pytest.mark.parametrize(1339("values", "compare_with", "compares_equal"),1340[1341(1342[date(1999, 12, 31), date(2021, 1, 31)],1343date(2021, 1, 31),1344[False, True],1345),1346(1347[datetime(2021, 1, 1, 12, 0, 0), datetime(2021, 1, 2, 12, 0, 0)],1348datetime(2021, 1, 1, 12, 0, 0),1349[True, False],1350),1351(1352[timedelta(days=1), timedelta(days=2)],1353timedelta(days=1),1354[True, False],1355),1356],1357)1358def test_temporal_comparison(1359values: list[Any], compare_with: Any, compares_equal: list[bool]1360) -> None:1361assert_series_equal(1362pl.Series(values) == compare_with,1363pl.Series(compares_equal, dtype=pl.Boolean),1364)136513661367@pytest.mark.parametrize(1368("drop_nulls", "drop_first"),1369[1370(False, False),1371(False, True),1372(True, False),1373(True, True),1374],1375)1376def test_to_dummies_with_nulls(drop_nulls: bool, drop_first: bool) -> None:1377s = pl.Series("s", [None, "a", "a", None, "b", "c"])1378expected = pl.DataFrame(1379{1380"s_a": [0, 1, 1, 0, 0, 0],1381"s_b": [0, 0, 0, 0, 1, 0],1382"s_c": [0, 0, 0, 0, 0, 1],1383"s_null": [1, 0, 0, 1, 0, 0],1384}1385).cast(pl.UInt8)13861387if drop_nulls:1388expected = expected.drop("s_null")1389if drop_first:1390expected = expected.drop("s_a")13911392result = s.to_dummies(drop_nulls=drop_nulls, drop_first=drop_first)1393assert_frame_equal(result, expected)139413951396@pytest.mark.parametrize(1397("drop_nulls", "drop_first"),1398[1399(False, False),1400(False, True),1401(True, False),1402(True, True),1403],1404)1405def test_to_dummies_no_nulls(drop_nulls: bool, drop_first: bool) -> None:1406s = pl.Series("s", ["a", "a", "b", "c"])1407expected = pl.DataFrame(1408{1409"s_a": [1, 1, 0, 0],1410"s_b": [0, 0, 1, 0],1411"s_c": [0, 0, 0, 1],1412}1413).cast(pl.UInt8)14141415if drop_first:1416expected = expected.drop("s_a")14171418result = s.to_dummies(drop_nulls=drop_nulls, drop_first=drop_first)1419assert_frame_equal(result, expected)142014211422def test_to_dummies_null_clash_19096() -> None:1423with pytest.raises(1424DuplicateError, match="column with name '_null' has more than one occurrence"1425):1426pl.Series([None, "null"]).to_dummies()142714281429def test_chunk_lengths() -> None:1430s = pl.Series("a", [1, 2, 2, 3])1431# this is a Series with one chunk, of length 41432assert s.n_chunks() == 11433assert s.chunk_lengths() == [4]143414351436def test_limit() -> None:1437s = pl.Series("a", [1, 2, 3])1438assert_series_equal(s.limit(2), pl.Series("a", [1, 2]))143914401441def test_filter() -> None:1442s = pl.Series("a", [1, 2, 3])1443mask = pl.Series("", [True, False, True])14441445assert_series_equal(s.filter(mask), pl.Series("a", [1, 3]))1446assert_series_equal(s.filter([True, False, True]), pl.Series("a", [1, 3]))1447assert_series_equal(s.filter(np.array([True, False, True])), pl.Series("a", [1, 3]))14481449with pytest.raises(RuntimeError, match="Expected a boolean mask"):1450s.filter(np.array([1, 0, 1]))145114521453def test_gather_every() -> None:1454s = pl.Series("a", [1, 2, 3, 4])1455assert_series_equal(s.gather_every(2), pl.Series("a", [1, 3]))1456assert_series_equal(s.gather_every(2, offset=1), pl.Series("a", [2, 4]))145714581459def test_arg_sort() -> None:1460s = pl.Series("a", [5, 3, 4, 1, 2])1461expected = pl.Series("a", [3, 4, 1, 2, 0], dtype=pl.get_index_type())14621463assert_series_equal(s.arg_sort(), expected)14641465expected_descending = pl.Series("a", [0, 2, 1, 4, 3], dtype=pl.get_index_type())1466assert_series_equal(s.arg_sort(descending=True), expected_descending)146714681469@pytest.mark.parametrize(1470("series", "argmin", "argmax"),1471[1472# Numeric1473(pl.Series([5, 3, 4, 1, 2]), 3, 0),1474(pl.Series([None, 5, 1]), 2, 1),1475(pl.Series([float("nan"), 3.0, 5.0]), 1, 2),1476(pl.Series([None, float("nan"), 3.0, 5.0]), 2, 3),1477# Boolean1478(pl.Series([True, False]), 1, 0),1479(pl.Series([True, True]), 0, 0),1480(pl.Series([False, False]), 0, 0),1481(pl.Series([None, True, False, True]), 2, 1),1482(pl.Series([None, True, True]), 1, 1),1483(pl.Series([None, False, False]), 1, 1),1484# String1485(pl.Series(["a", "c", "b"]), 0, 1),1486(pl.Series([None, "a", None, "b"]), 1, 3),1487# Binary1488(pl.Series([b"a", b"c", b"b"]), 0, 1),1489(pl.Series([None, b"a", None, b"b"]), 1, 3),1490# Decimal1491(pl.Series([Decimal("1.1"), Decimal("2.2"), Decimal("0.5")]), 2, 1),1492(pl.Series([None, Decimal("1.1"), None, Decimal("2.2")]), 1, 3),1493# Categorical1494(pl.Series(["c", "b", "a"], dtype=pl.Categorical()), 2, 0),1495(pl.Series("s", [None, "c", "b", None, "a"], pl.Categorical()), 4, 1),1496],1497)1498def test_arg_min_arg_max(series: pl.Series, argmin: int, argmax: int) -> None:1499assert series.arg_min() == argmin, (1500f"values: {series.to_list()}, expected {argmin} got {series.arg_min()}"1501)1502assert series.arg_max() == argmax, (1503f"values: {series.to_list()}, expected {argmax} got {series.arg_max()}"1504)150515061507@pytest.mark.parametrize(1508("series"),1509[1510# All nulls1511pl.Series([None, None], dtype=pl.Int32),1512pl.Series([None, None], dtype=pl.Boolean),1513pl.Series([None, None], dtype=pl.String),1514pl.Series([None, None], dtype=pl.Categorical),1515pl.Series([None, None], dtype=pl.Categorical()),1516# Empty Series1517pl.Series([], dtype=pl.Int32),1518pl.Series([], dtype=pl.Boolean),1519pl.Series([], dtype=pl.String),1520pl.Series([], dtype=pl.Categorical),1521],1522)1523def test_arg_min_arg_max_all_nulls_or_empty(series: pl.Series) -> None:1524assert series.arg_min() is None1525assert series.arg_max() is None152615271528def test_arg_min_and_arg_max_sorted() -> None:1529# test ascending and descending numerical series1530s = pl.Series([None, 1, 2, 3, 4, 5])1531s.sort(in_place=True) # set ascending sorted flag1532assert s.flags == {"SORTED_ASC": True, "SORTED_DESC": False}1533assert s.arg_min() == 11534assert s.arg_max() == 51535s = pl.Series([None, 5, 4, 3, 2, 1])1536s.sort(descending=True, in_place=True) # set descing sorted flag1537assert s.flags == {"SORTED_ASC": False, "SORTED_DESC": True}1538assert s.arg_min() == 51539assert s.arg_max() == 115401541# test ascending and descending str series1542s = pl.Series([None, "a", "b", "c", "d", "e"])1543s.sort(in_place=True) # set ascending sorted flag1544assert s.flags == {"SORTED_ASC": True, "SORTED_DESC": False}1545assert s.arg_min() == 11546assert s.arg_max() == 51547s = pl.Series([None, "e", "d", "c", "b", "a"])1548s.sort(descending=True, in_place=True) # set descing sorted flag1549assert s.flags == {"SORTED_ASC": False, "SORTED_DESC": True}1550assert s.arg_min() == 51551assert s.arg_max() == 1155215531554def test_is_null_is_not_null() -> None:1555s = pl.Series("a", [1.0, 2.0, 3.0, None])1556assert_series_equal(s.is_null(), pl.Series("a", [False, False, False, True]))1557assert_series_equal(s.is_not_null(), pl.Series("a", [True, True, True, False]))155815591560def test_is_finite_is_infinite() -> None:1561s = pl.Series("a", [1.0, 2.0, np.inf])1562assert_series_equal(s.is_finite(), pl.Series("a", [True, True, False]))1563assert_series_equal(s.is_infinite(), pl.Series("a", [False, False, True]))156415651566@pytest.mark.parametrize("float_type", [pl.Float32, pl.Float64])1567def test_is_nan_is_not_nan(float_type: PolarsDataType) -> None:1568s = pl.Series([1.0, np.nan, None], dtype=float_type)15691570assert_series_equal(s.is_nan(), pl.Series([False, True, None]))1571assert_series_equal(s.is_not_nan(), pl.Series([True, False, None]))1572assert_series_equal(s.fill_nan(2.0), pl.Series([1.0, 2.0, None], dtype=float_type))1573assert_series_equal(s.drop_nans(), pl.Series([1.0, None], dtype=float_type))157415751576def test_float_methods_on_ints() -> None:1577# these float-specific methods work on non-float numeric types1578s = pl.Series([1, None], dtype=pl.Int32)1579assert_series_equal(s.is_finite(), pl.Series([True, None]))1580assert_series_equal(s.is_infinite(), pl.Series([False, None]))1581assert_series_equal(s.is_nan(), pl.Series([False, None]))1582assert_series_equal(s.is_not_nan(), pl.Series([True, None]))158315841585def test_dot() -> None:1586s1 = pl.Series("a", [1, 2, 3])1587s2 = pl.Series("b", [4.0, 5.0, 6.0])15881589assert np.array([1, 2, 3]) @ np.array([4, 5, 6]) == 3215901591for dot_result in (1592s1.dot(s2),1593s1 @ s2,1594[1, 2, 3] @ s2,1595s1 @ np.array([4, 5, 6]),1596):1597assert dot_result == 3215981599with pytest.raises(ShapeError, match="length mismatch"):1600s1 @ [4, 5, 6, 7, 8]160116021603@pytest.mark.parametrize(1604("dtype"),1605[pl.Int8, pl.Int16, pl.Int32, pl.Float32, pl.Float64],1606)1607def test_peak_max_peak_min(dtype: pl.DataType) -> None:1608s = pl.Series("a", [4, 1, 3, 2, 5], dtype=dtype)16091610result = s.peak_min()1611expected = pl.Series("a", [False, True, False, True, False])1612assert_series_equal(result, expected)16131614result = s.peak_max()1615expected = pl.Series("a", [True, False, True, False, True])1616assert_series_equal(result, expected)161716181619def test_peak_max_peak_min_bool() -> None:1620s = pl.Series("a", [False, True, False, True, True, False], dtype=pl.Boolean)1621result = s.peak_min()1622expected = pl.Series("a", [False, False, True, False, False, False])1623assert_series_equal(result, expected)16241625result = s.peak_max()1626expected = pl.Series("a", [False, True, False, False, False, False])1627assert_series_equal(result, expected)162816291630def test_shrink_to_fit() -> None:1631s = pl.Series("a", [4, 1, 3, 2, 5])1632sf = s.shrink_to_fit(in_place=True)1633assert sf is s16341635s = pl.Series("a", [4, 1, 3, 2, 5])1636sf = s.shrink_to_fit(in_place=False)1637assert s is not sf163816391640@pytest.mark.parametrize("unit", ["ns", "us", "ms"])1641def test_cast_datetime_to_time(unit: TimeUnit) -> None:1642a = pl.Series(1643"a",1644[1645datetime(2022, 9, 7, 0, 0),1646datetime(2022, 9, 6, 12, 0),1647datetime(2022, 9, 7, 23, 59, 59),1648datetime(2022, 9, 7, 23, 59, 59, 201),1649],1650dtype=Datetime(unit),1651)1652if unit == "ms":1653# NOTE: microseconds are lost for `unit=ms`1654expected_values = [time(0, 0), time(12, 0), time(23, 59, 59), time(23, 59, 59)]1655else:1656expected_values = [1657time(0, 0),1658time(12, 0),1659time(23, 59, 59),1660time(23, 59, 59, 201),1661]1662expected = pl.Series("a", expected_values)1663assert_series_equal(a.cast(Time), expected)166416651666def test_init_categorical() -> None:1667for values in [[None], ["foo", "bar"], [None, "foo", "bar"]]:1668expected = pl.Series("a", values, dtype=pl.String).cast(pl.Categorical)1669a = pl.Series("a", values, dtype=pl.Categorical)1670assert_series_equal(a, expected)167116721673def test_iter_nested_list() -> None:1674elems = list(pl.Series("s", [[1, 2], [3, 4]]))1675assert_series_equal(elems[0], pl.Series([1, 2]))1676assert_series_equal(elems[1], pl.Series([3, 4]))16771678rev_elems = list(reversed(pl.Series("s", [[1, 2], [3, 4]])))1679assert_series_equal(rev_elems[0], pl.Series([3, 4]))1680assert_series_equal(rev_elems[1], pl.Series([1, 2]))168116821683def test_iter_nested_struct() -> None:1684# note: this feels inconsistent with the above test for nested list, but1685# let's ensure the behaviour is codified before potentially modifying...1686elems = list(pl.Series("s", [{"a": 1, "b": 2}, {"a": 3, "b": 4}]))1687assert elems[0] == {"a": 1, "b": 2}1688assert elems[1] == {"a": 3, "b": 4}16891690rev_elems = list(reversed(pl.Series("s", [{"a": 1, "b": 2}, {"a": 3, "b": 4}])))1691assert rev_elems[0] == {"a": 3, "b": 4}1692assert rev_elems[1] == {"a": 1, "b": 2}169316941695@pytest.mark.parametrize(1696"dtype",1697[1698pl.UInt8,1699pl.Float32,1700pl.Int32,1701pl.Boolean,1702pl.List(pl.String),1703pl.Struct([pl.Field("a", pl.Int64), pl.Field("b", pl.Boolean)]),1704],1705)1706def test_nested_list_types_preserved(dtype: pl.DataType) -> None:1707srs = pl.Series([pl.Series([], dtype=dtype) for _ in range(5)])1708for srs_nested in srs:1709assert srs_nested.dtype == dtype171017111712def test_to_physical() -> None:1713# casting an int result in an int1714s = pl.Series("a", [1, 2, 3])1715assert_series_equal(s.to_physical(), s)17161717# casting a date results in an Int321718s = pl.Series("a", [date(2020, 1, 1)] * 3)1719expected = pl.Series("a", [18262] * 3, dtype=Int32)1720assert_series_equal(s.to_physical(), expected)17211722# casting a categorical results in a UInt321723s = pl.Series(["cat1"]).cast(pl.Categorical)1724assert s.to_physical().dtype == pl.UInt3217251726# casting a small enum results in a UInt81727s = pl.Series(["cat1"]).cast(pl.Enum(["cat1"]))1728assert s.to_physical().dtype == pl.UInt817291730# casting a List(Categorical) results in a List(UInt32)1731s = pl.Series([["cat1"]]).cast(pl.List(pl.Categorical))1732assert s.to_physical().dtype == pl.List(pl.UInt32)17331734# casting a List(Enum) with a small enum results in a List(UInt8)1735s = pl.Series(["cat1"]).cast(pl.List(pl.Enum(["cat1"])))1736assert s.to_physical().dtype == pl.List(pl.UInt8)173717381739def test_to_physical_rechunked_21285() -> None:1740# A series with multiple chunks, dtype is array or list of structs with a1741# null field (causes rechunking) and a field with a different physical and1742# logical repr (causes the full body of `to_physical_repr` to run).1743arr_dtype = pl.Array(pl.Struct({"f0": pl.Time, "f1": pl.Null}), shape=(1,))1744s = pl.Series("a", [None], arr_dtype) # content doesn't matter1745s = s.append(s)1746expected_arr_dtype = pl.Array(pl.Struct({"f0": Int64, "f1": pl.Null}), shape=(1,))1747expected = pl.Series("a", [None, None], expected_arr_dtype)1748assert_series_equal(s.to_physical(), expected)17491750list_dtype = pl.List(pl.Struct({"f0": pl.Time, "f1": pl.Null}))1751s = pl.Series("a", [None], list_dtype) # content doesn't matter1752s = s.append(s)1753expected_list_dtype = pl.List(pl.Struct({"f0": Int64, "f1": pl.Null}))1754expected = pl.Series("a", [None, None], expected_list_dtype)1755assert_series_equal(s.to_physical(), expected)175617571758def test_is_between_datetime() -> None:1759s = pl.Series("a", [datetime(2020, 1, 1, 10, 0, 0), datetime(2020, 1, 1, 20, 0, 0)])1760start = datetime(2020, 1, 1, 12, 0, 0)1761end = datetime(2020, 1, 1, 23, 0, 0)1762expected = pl.Series("a", [False, True])17631764# only on the expression api1765result = s.to_frame().with_columns(pl.col("*").is_between(start, end)).to_series()1766assert_series_equal(result, expected)176717681769@pytest.mark.parametrize(1770"f",1771[1772"sin",1773"cos",1774"tan",1775"arcsin",1776"arccos",1777"arctan",1778"sinh",1779"cosh",1780"tanh",1781"arcsinh",1782"arccosh",1783"arctanh",1784],1785)1786@pytest.mark.filterwarnings("ignore:invalid value encountered:RuntimeWarning")1787def test_trigonometric(f: str) -> None:1788s = pl.Series("a", [0.0, math.pi, None, math.nan])1789expected = (1790pl.Series("a", getattr(np, f)(s.to_numpy()))1791.to_frame()1792.with_columns(pl.when(s.is_null()).then(None).otherwise(pl.col("a")).alias("a"))1793.to_series()1794)1795result = getattr(s, f)()1796assert_series_equal(result, expected)179717981799@pytest.mark.filterwarnings("ignore:invalid value encountered:RuntimeWarning")1800def test_trigonometric_cot() -> None:1801# cotangent is not available in numpy...1802s = pl.Series("a", [0.0, math.pi, None, math.nan])1803expected = pl.Series("a", [math.inf, -8.1656e15, None, math.nan])1804assert_series_equal(s.cot(), expected)180518061807def test_trigonometric_invalid_input() -> None:1808# String1809s = pl.Series("a", ["1", "2", "3"])1810with pytest.raises(InvalidOperationError):1811s.sin()18121813# Date1814s = pl.Series("a", [date(1990, 2, 28), date(2022, 7, 26)])1815with pytest.raises(InvalidOperationError):1816s.cosh()181718181819@pytest.mark.parametrize("dtype", INTEGER_DTYPES)1820def test_product_ints(dtype: PolarsDataType) -> None:1821a = pl.Series("a", [1, 2, 3], dtype=dtype)1822out = a.product()1823assert out == 61824a = pl.Series("a", [1, 2, None], dtype=dtype)1825out = a.product()1826assert out == 21827a = pl.Series("a", [None, 2, 3], dtype=dtype)1828out = a.product()1829assert out == 6183018311832@pytest.mark.parametrize("dtype", FLOAT_DTYPES)1833def test_product_floats(dtype: PolarsDataType) -> None:1834a = pl.Series("a", [], dtype=dtype)1835out = a.product()1836assert out == 11837a = pl.Series("a", [None, None], dtype=dtype)1838out = a.product()1839assert out == 11840a = pl.Series("a", [3.0, None, float("nan")], dtype=dtype)1841out = a.product()1842assert math.isnan(out)184318441845def test_ceil() -> None:1846s = pl.Series([1.8, 1.2, 3.0])1847expected = pl.Series([2.0, 2.0, 3.0])1848assert_series_equal(s.ceil(), expected)184918501851def test_duration_arithmetic() -> None:1852# apply some basic duration math to series1853s = pl.Series([datetime(2022, 1, 1, 10, 20, 30), datetime(2022, 1, 2, 20, 40, 50)])1854d1 = pl.duration(days=5, microseconds=123456)1855d2 = timedelta(days=5, microseconds=123456)18561857expected_values = [1858datetime(2022, 1, 6, 10, 20, 30, 123456),1859datetime(2022, 1, 7, 20, 40, 50, 123456),1860]1861for d in (d1, d2):1862df1 = pl.select((s + d).alias("d_offset"))1863df2 = pl.select((d + s).alias("d_offset"))1864assert df1["d_offset"].to_list() == expected_values1865assert_series_equal(df1["d_offset"], df2["d_offset"])186618671868def test_mean_overflow() -> None:1869arr = np.array([255] * (1 << 17), dtype="int16")1870assert arr.mean() == 255.0187118721873def test_sign() -> None:1874# Integers1875a = pl.Series("a", [-9, -0, 0, 4, None])1876expected = pl.Series("a", [-1, 0, 0, 1, None])1877assert_series_equal(a.sign(), expected)18781879# Floats1880a = pl.Series("a", [-9.0, -0.0, 0.0, 4.0, float("nan"), None])1881expected = pl.Series("a", [-1.0, 0.0, 0.0, 1.0, float("nan"), None])1882assert_series_equal(a.sign(), expected)18831884# Decimal1885s = pl.Series("a", [1, -1, 10, -10])1886for scale in [0, 1, 2, 3, 7, 16, 20, 30]:1887dtype = pl.Decimal(scale=scale)1888assert_series_equal(s.sign().cast(dtype), s.cast(dtype).sign())18891890s = pl.Series("a", ["1.00", "20.00", "-1", "0", "-7"], dtype)1891assert_series_equal(1892s.sign(), pl.Series("a", ["1", "1", "-1", "0", "-1"], dtype)1893)18941895# Invalid input1896a = pl.Series("a", [date(1950, 2, 1), date(1970, 1, 1), date(2022, 12, 12), None])1897with pytest.raises(InvalidOperationError):1898a.sign()189919001901def test_exp() -> None:1902s = pl.Series("a", [0.1, 0.01, None])1903expected = pl.Series("a", [1.1051709180756477, 1.010050167084168, None])1904assert_series_equal(s.exp(), expected)1905# test if we can run on empty series as well.1906assert s[:0].exp().to_list() == []190719081909def test_cumulative_eval() -> None:1910s = pl.Series("values", [1, 2, 3, 4, 5])19111912# evaluate expressions individually1913expr1 = pl.element().first()1914expr2 = pl.element().last() ** 219151916expected1 = pl.Series("values", [1, 1, 1, 1, 1])1917expected2 = pl.Series("values", [1, 4, 9, 16, 25])1918assert_series_equal(s.cumulative_eval(expr1), expected1)1919assert_series_equal(s.cumulative_eval(expr2), expected2)19201921# evaluate combined expressions and validate1922expr3 = expr1 - expr21923expected3 = pl.Series("values", [0, -3, -8, -15, -24])1924assert_series_equal(s.cumulative_eval(expr3), expected3)192519261927def test_first_last() -> None:1928# Ensure multiple chunks1929s1 = pl.Series("a", [None, None], dtype=pl.Int32)1930s2 = pl.Series("a", [None, 3, 4, None], dtype=pl.Int32)1931s3 = pl.Series("a", [None, None], dtype=pl.Int32)1932s = s1.append(s2).append(s3)1933assert s.first() is None1934assert s.first(ignore_nulls=True) == 31935assert s.last() is None1936assert s.last(ignore_nulls=True) == 4193719381939def test_clip() -> None:1940s = pl.Series("foo", [-50, 5, None, 50])1941assert s.clip(1, 10).to_list() == [1, 5, None, 10]194219431944def test_repr() -> None:1945s = pl.Series("ints", [1001, 2002, 3003])1946s_repr = repr(s)19471948assert "shape: (3,)" in s_repr1949assert "Series: 'ints' [i64]" in s_repr1950for n in s.to_list():1951assert str(n) in s_repr19521953class XSeries(pl.Series):1954"""Custom Series class."""19551956# check custom class name reflected in repr output1957x = XSeries("ints", [1001, 2002, 3003])1958x_repr = repr(x)19591960assert "shape: (3,)" in x_repr1961assert "XSeries: 'ints' [i64]" in x_repr1962assert "1001" in x_repr1963for n in x.to_list():1964assert str(n) in x_repr196519661967def test_repr_html(df: pl.DataFrame) -> None:1968# check it does not panic/error, and appears to contain a table1969html = pl.Series("misc", [123, 456, 789])._repr_html_()1970assert "<table" in html197119721973@pytest.mark.parametrize(1974("value", "time_unit", "exp", "exp_type"),1975[1976(197713285,1978"d",1979date(2006, 5, 17),1980pl.Date,1981),1982(19831147880044,1984"s",1985datetime(2006, 5, 17, 15, 34, 4),1986pl.Datetime("us"),1987),1988(19891147880044 * 1_000,1990"ms",1991datetime(2006, 5, 17, 15, 34, 4),1992pl.Datetime("us"),1993),1994(19951147880044 * 1_000_000,1996"us",1997datetime(2006, 5, 17, 15, 34, 4),1998pl.Datetime("us"),1999),2000(20011147880044 * 1_000_000_000,2002"ns",2003datetime(2006, 5, 17, 15, 34, 4),2004pl.Datetime("ns"),2005),2006],2007)2008def test_from_epoch_expr(2009value: int,2010time_unit: EpochTimeUnit,2011exp: date | datetime,2012exp_type: PolarsDataType,2013) -> None:2014s = pl.Series("timestamp", [value, None])2015result = pl.from_epoch(s, time_unit=time_unit)20162017expected = pl.Series("timestamp", [exp, None]).cast(exp_type)2018assert_series_equal(result, expected)201920202021def test_get_chunks() -> None:2022a = pl.Series("a", [1, 2])2023b = pl.Series("a", [3, 4])2024chunks = pl.concat([a, b], rechunk=False).get_chunks()2025assert_series_equal(chunks[0], a)2026assert_series_equal(chunks[1], b)202720282029def test_null_comparisons() -> None:2030s = pl.Series("s", [None, "str", "a"])2031assert (s.shift() == s).null_count() == 22032assert (s.shift() != s).null_count() == 2203320342035def test_min_max_agg_on_str() -> None:2036strings = ["b", "a", "x"]2037s = pl.Series(strings)2038assert (s.min(), s.max()) == ("a", "x")203920402041def test_min_max_full_nan_15058() -> None:2042s = pl.Series([float("nan")] * 2)2043assert all(x != x for x in [s.min(), s.max()])204420452046def test_is_between() -> None:2047s = pl.Series("num", [1, 2, None, 4, 5])2048assert s.is_between(2, 4).to_list() == [False, True, None, True, False]20492050s = pl.Series("num", [1, 2, None, 4, 5])2051assert s.is_between(2, 4, closed="left").to_list() == [2052False,2053True,2054None,2055False,2056False,2057]20582059s = pl.Series("num", [1, 2, None, 4, 5])2060assert s.is_between(2, 4, closed="right").to_list() == [2061False,2062False,2063None,2064True,2065False,2066]20672068s = pl.Series("num", [1, 2, None, 4, 5])2069assert s.is_between(pl.lit(2) / 2, pl.lit(4) * 2, closed="both").to_list() == [2070True,2071True,2072None,2073True,2074True,2075]20762077s = pl.Series("s", ["a", "b", "c", "d", "e"])2078assert s.is_between("b", "d").to_list() == [2079False,2080True,2081True,2082True,2083False,2084]208520862087@pytest.mark.parametrize(2088("dtype", "lower", "upper"),2089[2090(pl.Int8, -128, 127),2091(pl.UInt8, 0, 255),2092(pl.Int16, -32768, 32767),2093(pl.UInt16, 0, 65535),2094(pl.Int32, -2147483648, 2147483647),2095(pl.UInt32, 0, 4294967295),2096(pl.Int64, -9223372036854775808, 9223372036854775807),2097(pl.UInt64, 0, 18446744073709551615),2098(pl.Float32, float("-inf"), float("inf")),2099(pl.Float64, float("-inf"), float("inf")),2100],2101)2102def test_upper_lower_bounds(2103dtype: PolarsDataType, upper: int | float, lower: int | float2104) -> None:2105s = pl.Series("s", dtype=dtype)2106assert s.lower_bound().item() == lower2107assert s.upper_bound().item() == upper210821092110def test_numpy_series_arithmetic() -> None:2111sx = pl.Series(values=[1, 2])2112y = np.array([3.0, 4.0])21132114result_add1 = y + sx2115result_add2 = sx + y2116expected_add = pl.Series([4.0, 6.0], dtype=pl.Float64)2117assert_series_equal(result_add1, expected_add) # type: ignore[arg-type]2118assert_series_equal(result_add2, expected_add)21192120result_sub1 = cast("pl.Series", y - sx) # py37 is different vs py311 on this one2121expected = pl.Series([2.0, 2.0], dtype=pl.Float64)2122assert_series_equal(result_sub1, expected)2123result_sub2 = sx - y2124expected = pl.Series([-2.0, -2.0], dtype=pl.Float64)2125assert_series_equal(result_sub2, expected)21262127result_mul1 = y * sx2128result_mul2 = sx * y2129expected = pl.Series([3.0, 8.0], dtype=pl.Float64)2130assert_series_equal(result_mul1, expected) # type: ignore[arg-type]2131assert_series_equal(result_mul2, expected)21322133result_div1 = y / sx2134expected = pl.Series([3.0, 2.0], dtype=pl.Float64)2135assert_series_equal(result_div1, expected) # type: ignore[arg-type]2136result_div2 = sx / y2137expected = pl.Series([1 / 3, 0.5], dtype=pl.Float64)2138assert_series_equal(result_div2, expected)21392140result_pow1 = y**sx2141expected = pl.Series([3.0, 16.0], dtype=pl.Float64)2142assert_series_equal(result_pow1, expected) # type: ignore[arg-type]2143result_pow2 = sx**y2144expected = pl.Series([1.0, 16.0], dtype=pl.Float64)2145assert_series_equal(result_pow2, expected) # type: ignore[arg-type]214621472148def test_from_epoch_seq_input() -> None:2149seq_input = [1147880044]2150expected = pl.Series([datetime(2006, 5, 17, 15, 34, 4)])2151result = pl.from_epoch(seq_input)2152assert_series_equal(result, expected)215321542155def test_symmetry_for_max_in_names() -> None:2156# int2157a = pl.Series("a", [1])2158assert (a - a.max()).name == (a.max() - a).name == a.name # type: ignore[union-attr]2159# float2160a = pl.Series("a", [1.0])2161assert (a - a.max()).name == (a.max() - a).name == a.name # type: ignore[union-attr]2162# duration2163a = pl.Series("a", [1], dtype=pl.Duration("ns"))2164assert (a - a.max()).name == (a.max() - a).name == a.name # type: ignore[union-attr]2165# datetime2166a = pl.Series("a", [1], dtype=pl.Datetime("ns"))2167assert (a - a.max()).name == (a.max() - a).name == a.name # type: ignore[union-attr]21682169# TODO: time arithmetic support?2170# a = pl.Series("a", [1], dtype=pl.Time)2171# assert (a - a.max()).name == (a.max() - a).name == a.name217221732174def test_series_getitem_out_of_bounds_positive() -> None:2175s = pl.Series([1, 2])2176with pytest.raises(2177IndexError, match="index 10 is out of bounds for sequence of length 2"2178):2179s[10]218021812182def test_series_getitem_out_of_bounds_negative() -> None:2183s = pl.Series([1, 2])2184with pytest.raises(2185IndexError, match="index -10 is out of bounds for sequence of length 2"2186):2187s[-10]218821892190def test_series_cmp_fast_paths() -> None:2191assert (2192pl.Series([None], dtype=pl.Int32) != pl.Series([1, 2], dtype=pl.Int32)2193).to_list() == [None, None]2194assert (2195pl.Series([None], dtype=pl.Int32) == pl.Series([1, 2], dtype=pl.Int32)2196).to_list() == [None, None]21972198assert (2199pl.Series([None], dtype=pl.String) != pl.Series(["a", "b"], dtype=pl.String)2200).to_list() == [None, None]2201assert (2202pl.Series([None], dtype=pl.String) == pl.Series(["a", "b"], dtype=pl.String)2203).to_list() == [None, None]22042205assert (2206pl.Series([None], dtype=pl.Boolean)2207!= pl.Series([True, False], dtype=pl.Boolean)2208).to_list() == [None, None]2209assert (2210pl.Series([None], dtype=pl.Boolean)2211== pl.Series([False, False], dtype=pl.Boolean)2212).to_list() == [None, None]221322142215def test_comp_series_with_str_13123() -> None:2216s = pl.Series(["1", "2", None])2217assert_series_equal(s != "1", pl.Series([False, True, None]))2218assert_series_equal(s == "1", pl.Series([True, False, None]))2219assert_series_equal(s.eq_missing("1"), pl.Series([True, False, False]))2220assert_series_equal(s.ne_missing("1"), pl.Series([False, True, True]))222122222223@pytest.mark.parametrize(2224("data", "single", "multiple", "single_expected", "multiple_expected"),2225[2226([1, 2, 3], 1, [2, 4], 0, [1, 3]),2227(["a", "b", "c"], "d", ["a", "d"], 3, [0, 3]),2228([b"a", b"b", b"c"], b"d", [b"a", b"d"], 3, [0, 3]),2229(2230[date(2022, 1, 2), date(2023, 4, 1)],2231date(2022, 1, 1),2232[date(1999, 10, 1), date(2024, 1, 1)],22330,2234[0, 2],2235),2236([1, 2, 3], 1, np.array([2, 4]), 0, [1, 3]), # test np array.2237],2238)2239def test_search_sorted(2240data: list[Any],2241single: Any,2242multiple: list[Any],2243single_expected: Any,2244multiple_expected: list[Any],2245) -> None:2246s = pl.Series(data)2247single_s = s.search_sorted(single)2248assert single_s == single_expected22492250multiple_s = s.search_sorted(multiple)2251assert_series_equal(2252multiple_s, pl.Series(multiple_expected, dtype=pl.get_index_type())2253)225422552256def test_series_from_pandas_with_dtype() -> None:2257expected = pl.Series("foo", [1, 2, 3], dtype=pl.Int8)2258s = pl.Series("foo", pd.Series([1, 2, 3]), pl.Int8)2259assert_series_equal(s, expected)2260s = pl.Series("foo", pd.Series([1, 2, 3], dtype="Int16"), pl.Int8)2261assert_series_equal(s, expected)22622263with pytest.raises(InvalidOperationError, match="conversion from"):2264pl.Series("foo", pd.Series([-1, 2, 3]), pl.UInt8)2265s = pl.Series("foo", pd.Series([-1, 2, 3]), pl.UInt8, strict=False)2266assert s.to_list() == [None, 2, 3]2267assert s.dtype == pl.UInt822682269with pytest.raises(InvalidOperationError, match="conversion from"):2270pl.Series("foo", pd.Series([-1, 2, 3], dtype="Int8"), pl.UInt8)2271s = pl.Series("foo", pd.Series([-1, 2, 3], dtype="Int8"), pl.UInt8, strict=False)2272assert s.to_list() == [None, 2, 3]2273assert s.dtype == pl.UInt8227422752276def test_series_from_pyarrow_with_dtype() -> None:2277s = pl.Series("foo", pa.array([-1, 2, 3]), pl.Int8)2278assert_series_equal(s, pl.Series("foo", [-1, 2, 3], dtype=pl.Int8))22792280with pytest.raises(InvalidOperationError, match="conversion from"):2281pl.Series("foo", pa.array([-1, 2, 3]), pl.UInt8)22822283s = pl.Series("foo", pa.array([-1, 2, 3]), dtype=pl.UInt8, strict=False)2284assert s.to_list() == [None, 2, 3]2285assert s.dtype == pl.UInt8228622872288def test_series_from_numpy_with_dtype() -> None:2289s = pl.Series("foo", np.array([-1, 2, 3]), pl.Int8)2290assert_series_equal(s, pl.Series("foo", [-1, 2, 3], dtype=pl.Int8))22912292with pytest.raises(InvalidOperationError, match="conversion from"):2293pl.Series("foo", np.array([-1, 2, 3]), pl.UInt8)22942295s = pl.Series("foo", np.array([-1, 2, 3]), dtype=pl.UInt8, strict=False)2296assert s.to_list() == [None, 2, 3]2297assert s.dtype == pl.UInt8229822992300def test_raise_invalid_is_between() -> None:2301with pytest.raises(pl.exceptions.InvalidOperationError):2302pl.select(pl.lit(2).is_between(pl.lit("11"), pl.lit("33")))230323042305def test_construction_large_nested_u64_17231() -> None:2306import polars as pl23072308values = [{"f0": [9223372036854775808]}]2309dtype = pl.Struct({"f0": pl.List(pl.UInt64)})2310assert pl.Series(values, dtype=dtype).to_list() == values231123122313def test_repeat_by() -> None:2314calculated = pl.select(a=pl.Series("a", [1, 2]).repeat_by(2))2315expected = pl.select(a=pl.Series("a", [[1, 1], [2, 2]]))2316assert calculated.equals(expected)231723182319def test_is_close() -> None:2320a = pl.Series(2321"a",2322[23231.0,23241.0,2325float("-inf"),2326float("inf"),2327float("inf"),2328float("inf"),2329float("nan"),2330],2331)2332b = pl.Series(2333"b", [1.3, 1.7, float("-inf"), float("inf"), float("-inf"), 1.0, float("nan")]2334)2335assert a.is_close(b, abs_tol=0.5).to_list() == [2336True,2337False,2338True,2339True,2340False,2341False,2342False,2343]234423452346def test_is_close_literal() -> None:2347a = pl.Series("a", [1.1, 1.2, 1.3, 1.4, float("inf"), float("nan")])2348assert a.is_close(1.2).to_list() == [False, True, False, False, False, False]234923502351def test_is_close_nans_equal() -> None:2352a = pl.Series("a", [1.0, float("nan")])2353b = pl.Series("b", [2.0, float("nan")])2354assert a.is_close(b, nans_equal=True).to_list() == [False, True]235523562357def test_is_close_invalid_abs_tol() -> None:2358with pytest.raises(pl.exceptions.ComputeError):2359pl.select(pl.lit(1.0).is_close(1, abs_tol=-1.0))236023612362def test_is_close_invalid_rel_tol() -> None:2363with pytest.raises(pl.exceptions.ComputeError):2364pl.select(pl.lit(1.0).is_close(1, rel_tol=-1.0))236523662367def test_comparisons_structs_raise() -> None:2368s = pl.Series([{"x": 1}, {"x": 2}, {"x": 3}])2369rhss = ["", " ", 5, {"x": 1}]2370for rhs in rhss:2371with pytest.raises(2372NotImplementedError,2373match=r"Series of type Struct\(\{'x': Int64\}\) does not have eq operator",2374):2375s == rhs # noqa: B015237623772378