Path: blob/main/py-polars/tests/unit/series/test_series.py
6939 views
from __future__ import annotations12import math3from datetime import date, datetime, time, timedelta4from typing import TYPE_CHECKING, Any, cast5from zoneinfo import ZoneInfo67import numpy as np8import pandas as pd9import pyarrow as pa10import pytest1112import polars as pl13from polars._utils.construction import iterable_to_pyseries14from polars.datatypes import (15Datetime,16Field,17Float64,18Int32,19Int64,20Time,21UInt32,22UInt64,23Unknown,24)25from polars.exceptions import (26DuplicateError,27InvalidOperationError,28PolarsInefficientMapWarning,29ShapeError,30)31from polars.testing import assert_frame_equal, assert_series_equal32from tests.unit.conftest import FLOAT_DTYPES, INTEGER_DTYPES33from tests.unit.utils.pycapsule_utils import PyCapsuleStreamHolder3435if TYPE_CHECKING:36from collections.abc import Iterator3738from polars._typing import EpochTimeUnit, PolarsDataType, TimeUnit394041def test_cum_agg() -> None:42# confirm that known series give expected results43s = pl.Series("a", [1, 2, 3, 2])44assert_series_equal(s.cum_sum(), pl.Series("a", [1, 3, 6, 8]))45assert_series_equal(s.cum_min(), pl.Series("a", [1, 1, 1, 1]))46assert_series_equal(s.cum_max(), pl.Series("a", [1, 2, 3, 3]))47assert_series_equal(s.cum_prod(), pl.Series("a", [1, 2, 6, 12]))484950def test_cum_agg_with_nulls() -> None:51# confirm that known series give expected results52s = pl.Series("a", [None, 2, None, 7, 8, None])53assert_series_equal(s.cum_sum(), pl.Series("a", [None, 2, None, 9, 17, None]))54assert_series_equal(s.cum_min(), pl.Series("a", [None, 2, None, 2, 2, None]))55assert_series_equal(s.cum_max(), pl.Series("a", [None, 2, None, 7, 8, None]))56assert_series_equal(s.cum_prod(), pl.Series("a", [None, 2, None, 14, 112, None]))575859def test_cum_agg_with_infs() -> None:60# confirm that inf values are handled correctly61s = pl.Series([float("inf"), 0.0, 1.0])62assert_series_equal(s.cum_min(), pl.Series([float("inf"), 0.0, 0.0]))6364s = pl.Series([float("-inf"), 0.0, 1.0])65assert_series_equal(s.cum_max(), pl.Series([float("-inf"), 0.0, 1.0]))666768def test_cum_min_max_bool() -> None:69s = pl.Series("a", [None, True, True, None, False, None, True, False, False, None])70assert_series_equal(s.cum_min().cast(pl.Int32), s.cast(pl.Int32).cum_min())71assert_series_equal(s.cum_max().cast(pl.Int32), s.cast(pl.Int32).cum_max())72assert_series_equal(73s.cum_min(reverse=True).cast(pl.Int32), s.cast(pl.Int32).cum_min(reverse=True)74)75assert_series_equal(76s.cum_max(reverse=True).cast(pl.Int32), s.cast(pl.Int32).cum_max(reverse=True)77)787980def test_init_inputs(monkeypatch: Any) -> None:81nan = float("nan")82# Good inputs83pl.Series("a", [1, 2])84pl.Series("a", values=[1, 2])85pl.Series(name="a", values=[1, 2])86pl.Series(values=[1, 2], name="a")8788assert pl.Series([1, 2]).dtype == pl.Int6489assert pl.Series(values=[1, 2]).dtype == pl.Int6490assert pl.Series("a").dtype == pl.Null # Null dtype used in case of no data91assert pl.Series().dtype == pl.Null92assert pl.Series([]).dtype == pl.Null93assert (94pl.Series([None, None, None]).dtype == pl.Null95) # f32 type used for list with only None96assert pl.Series(values=[True, False]).dtype == pl.Boolean97assert pl.Series(values=np.array([True, False])).dtype == pl.Boolean98assert pl.Series(values=np.array(["foo", "bar"])).dtype == pl.String99assert pl.Series(values=["foo", "bar"]).dtype == pl.String100assert pl.Series("a", [pl.Series([1, 2, 4]), pl.Series([3, 2, 1])]).dtype == pl.List101assert pl.Series("a", [10000, 20000, 30000], dtype=pl.Time).dtype == pl.Time102103# 2d numpy array and/or list of 1d numpy arrays104for res in (105pl.Series(106name="a",107values=np.array([[1, 2], [3, nan]], dtype=np.float32),108nan_to_null=True,109),110pl.Series(111name="a",112values=[113np.array([1, 2], dtype=np.float32),114np.array([3, nan], dtype=np.float32),115],116nan_to_null=True,117),118pl.Series(119name="a",120values=(121np.ndarray((2,), np.float32, np.array([1, 2], dtype=np.float32)),122np.ndarray((2,), np.float32, np.array([3, nan], dtype=np.float32)),123),124nan_to_null=True,125),126):127assert res.dtype == pl.Array(pl.Float32, shape=2)128assert res[0].to_list() == [1.0, 2.0]129assert res[1].to_list() == [3.0, None]130131# numpy from arange, with/without dtype132two_ints = np.arange(2, dtype=np.int64)133three_ints = np.arange(3, dtype=np.int64)134for res in (135pl.Series("a", [two_ints, three_ints]),136pl.Series("a", [two_ints, three_ints], dtype=pl.List(pl.Int64)),137):138assert res.dtype == pl.List(pl.Int64)139assert res.to_list() == [[0, 1], [0, 1, 2]]140141assert pl.Series(142values=np.array([["foo", "bar"], ["foo2", "bar2"]])143).dtype == pl.Array(pl.String, shape=2)144145# lists146assert pl.Series("a", [[1, 2], [3, 4]]).dtype == pl.List(pl.Int64)147148# conversion of Date to Datetime149s = pl.Series([date(2023, 1, 1), date(2023, 1, 2)], dtype=pl.Datetime)150assert s.to_list() == [datetime(2023, 1, 1), datetime(2023, 1, 2)]151assert Datetime == s.dtype152assert s.dtype.time_unit == "us" # type: ignore[attr-defined]153assert s.dtype.time_zone is None # type: ignore[attr-defined]154155# conversion of Date to Datetime with specified timezone and units156tu: TimeUnit = "ms"157tz = "America/Argentina/Rio_Gallegos"158s = pl.Series(159[date(2023, 1, 1), date(2023, 1, 2)], dtype=pl.Datetime(tu)160).dt.replace_time_zone(tz)161d1 = datetime(2023, 1, 1, 0, 0, 0, 0, ZoneInfo(tz))162d2 = datetime(2023, 1, 2, 0, 0, 0, 0, ZoneInfo(tz))163assert s.to_list() == [d1, d2]164assert Datetime == s.dtype165assert s.dtype.time_unit == tu # type: ignore[attr-defined]166assert s.dtype.time_zone == tz # type: ignore[attr-defined]167168# datetime64: check timeunit (auto-detect, implicit/explicit) and NaT169d64 = pd.date_range(date(2021, 8, 1), date(2021, 8, 3)).values170d64[1] = None171172expected = [datetime(2021, 8, 1, 0), None, datetime(2021, 8, 3, 0)]173for dtype in (None, Datetime, Datetime("ns")):174s = pl.Series("dates", d64, dtype)175assert s.to_list() == expected176assert Datetime == s.dtype177assert s.dtype.time_unit == "ns" # type: ignore[attr-defined]178179s = pl.Series(values=d64.astype("<M8[ms]"))180assert s.dtype.time_unit == "ms" # type: ignore[attr-defined]181assert expected == s.to_list()182183# pandas184assert pl.Series(pd.Series([1, 2])).dtype == pl.Int64185186# Bad inputs187with pytest.raises(TypeError):188pl.Series([1, 2, 3], [1, 2, 3])189with pytest.raises(TypeError):190pl.Series({"a": [1, 2, 3]})191with pytest.raises(OverflowError):192pl.Series("bigint", [2**128])193194# numpy not available195monkeypatch.setattr(pl.series.series, "_check_for_numpy", lambda x: False)196with pytest.raises(TypeError):197pl.DataFrame(np.array([1, 2, 3]), schema=["a"])198199200def test_init_structured_objects() -> None:201# validate init from dataclass, namedtuple, and pydantic model objects202from typing import NamedTuple203204from polars.dependencies import dataclasses, pydantic205206@dataclasses.dataclass207class TeaShipmentDC:208exporter: str209importer: str210product: str211tonnes: int | None212213class TeaShipmentNT(NamedTuple):214exporter: str215importer: str216product: str217tonnes: None | int218219class TeaShipmentPD(pydantic.BaseModel):220exporter: str221importer: str222product: str223tonnes: int224225for Tea in (TeaShipmentDC, TeaShipmentNT, TeaShipmentPD):226t0 = Tea(exporter="Sri Lanka", importer="USA", product="Ceylon", tonnes=10)227t1 = Tea(exporter="India", importer="UK", product="Darjeeling", tonnes=25)228t2 = Tea(exporter="China", importer="UK", product="Keemum", tonnes=40)229230s = pl.Series("t", [t0, t1, t2])231232assert isinstance(s, pl.Series)233assert s.dtype.fields == [ # type: ignore[attr-defined]234Field("exporter", pl.String),235Field("importer", pl.String),236Field("product", pl.String),237Field("tonnes", pl.Int64),238]239assert s.to_list() == [240{241"exporter": "Sri Lanka",242"importer": "USA",243"product": "Ceylon",244"tonnes": 10,245},246{247"exporter": "India",248"importer": "UK",249"product": "Darjeeling",250"tonnes": 25,251},252{253"exporter": "China",254"importer": "UK",255"product": "Keemum",256"tonnes": 40,257},258]259assert_frame_equal(s.to_frame(), pl.DataFrame({"t": [t0, t1, t2]}))260261262def test_to_frame() -> None:263s1 = pl.Series([1, 2])264s2 = pl.Series("s", [1, 2])265266df1 = s1.to_frame()267df2 = s2.to_frame()268df3 = s1.to_frame("xyz")269df4 = s2.to_frame("xyz")270271for df, name in ((df1, ""), (df2, "s"), (df3, "xyz"), (df4, "xyz")):272assert isinstance(df, pl.DataFrame)273assert df.rows() == [(1,), (2,)]274assert df.columns == [name]275276# note: the empty string IS technically a valid column name277assert s2.to_frame("").columns == [""]278assert s2.name == "s"279280281def test_bitwise_ops() -> None:282a = pl.Series([True, False, True])283b = pl.Series([False, True, True])284assert_series_equal((a & b), pl.Series([False, False, True]))285assert_series_equal((a | b), pl.Series([True, True, True]))286assert_series_equal((a ^ b), pl.Series([True, True, False]))287assert_series_equal((~a), pl.Series([False, True, False]))288289# rand/rxor/ror we trigger by casting the left hand to a list here in the test290# Note that the type annotations only allow Series to be passed in, but there is291# specific code to deal with non-Series inputs.292assert_series_equal(293(True & a),294pl.Series([True, False, True]),295)296assert_series_equal(297(True | a),298pl.Series([True, True, True]),299)300assert_series_equal(301(True ^ a),302pl.Series([False, True, False]),303)304305306def test_bitwise_floats_invert() -> None:307s = pl.Series([2.0, 3.0, 0.0])308309with pytest.raises(InvalidOperationError):310~s311312313def test_equality() -> None:314a = pl.Series("a", [1, 2])315b = a316317cmp = a == b318assert isinstance(cmp, pl.Series)319assert cmp.sum() == 2320assert (a != b).sum() == 0321assert (a >= b).sum() == 2322assert (a <= b).sum() == 2323assert (a > b).sum() == 0324assert (a < b).sum() == 0325assert a.sum() == 3326assert_series_equal(a, b)327328a = pl.Series("name", ["ham", "foo", "bar"])329assert_series_equal((a == "ham"), pl.Series("name", [True, False, False]))330331a = pl.Series("name", [[1], [1, 2], [2, 3]])332assert_series_equal((a == [1]), pl.Series("name", [True, False, False]))333334335def test_agg() -> None:336series = pl.Series("a", [1, 2])337assert series.mean() == 1.5338assert series.min() == 1339assert series.max() == 2340341342def test_date_agg() -> None:343series = pl.Series(344[345date(2022, 8, 2),346date(2096, 8, 1),347date(9009, 9, 9),348],349dtype=pl.Date,350)351assert series.min() == date(2022, 8, 2)352assert series.max() == date(9009, 9, 9)353354355@pytest.mark.parametrize(356("s", "min", "max"),357[358(pl.Series(["c", "b", "a"], dtype=pl.Categorical("lexical")), "a", "c"),359(pl.Series([None, "a", "c", "b"], dtype=pl.Categorical("lexical")), "a", "c"),360(pl.Series([], dtype=pl.Categorical("lexical")), None, None),361(pl.Series(["c", "b", "a"], dtype=pl.Enum(["c", "b", "a"])), "c", "a"),362(pl.Series(["c", "b", "a"], dtype=pl.Enum(["c", "b", "a", "d"])), "c", "a"),363],364)365def test_categorical_agg(s: pl.Series, min: str | None, max: str | None) -> None:366assert s.min() == min367assert s.max() == max368369370def test_add_string() -> None:371s = pl.Series(["hello", "weird"])372result = s + " world"373print(result)374assert_series_equal(result, pl.Series(["hello world", "weird world"]))375376result = "pfx:" + s377assert_series_equal(result, pl.Series("literal", ["pfx:hello", "pfx:weird"]))378379380@pytest.mark.parametrize(381("data", "expected_dtype"),382[383(100, pl.Int64),384(8.5, pl.Float64),385("서울특별시", pl.String),386(date.today(), pl.Date),387(datetime.now(), pl.Datetime("us")),388(time(23, 59, 59), pl.Time),389(timedelta(hours=7, seconds=123), pl.Duration("us")),390],391)392def test_unknown_dtype(data: Any, expected_dtype: PolarsDataType) -> None:393# if given 'Unknown', should be able to infer the correct dtype394s = pl.Series([data], dtype=Unknown)395assert s.dtype == expected_dtype396assert s.to_list() == [data]397398399def test_various() -> None:400a = pl.Series("a", [1, 2])401assert a.is_null().sum() == 0402assert a.name == "a"403404a = a.rename("b")405assert a.name == "b"406assert a.len() == 2407assert len(a) == 2408409a.append(a.clone())410assert_series_equal(a, pl.Series("b", [1, 2, 1, 2]))411412a = pl.Series("a", range(20))413assert a.head(5).len() == 5414assert a.tail(5).len() == 5415assert (a.head(5) != a.tail(5)).all()416417a = pl.Series("a", [2, 1, 4])418a.sort(in_place=True)419assert_series_equal(a, pl.Series("a", [1, 2, 4]))420a = pl.Series("a", [2, 1, 1, 4, 4, 4])421assert_series_equal(a.arg_unique(), pl.Series("a", [0, 1, 3], dtype=UInt32))422423assert_series_equal(a.gather([2, 3]), pl.Series("a", [1, 4]))424425426def test_series_dtype_is() -> None:427s = pl.Series("s", [1, 2, 3])428429assert s.dtype.is_numeric()430assert s.dtype.is_integer()431assert s.dtype.is_signed_integer()432assert not s.dtype.is_unsigned_integer()433assert (s * 0.99).dtype.is_float()434435s = pl.Series("s", [1, 2, 3], dtype=pl.UInt8)436assert s.dtype.is_numeric()437assert s.dtype.is_integer()438assert not s.dtype.is_signed_integer()439assert s.dtype.is_unsigned_integer()440441s = pl.Series("bool", [True, None, False])442assert not s.dtype.is_numeric()443444s = pl.Series("s", ["testing..."])445assert s.dtype == pl.String446assert s.dtype != pl.Boolean447448s = pl.Series("s", [], dtype=pl.Decimal(20, 15))449assert not s.dtype.is_float()450assert s.dtype.is_numeric()451assert s.is_empty()452453s = pl.Series("s", [], dtype=pl.Datetime("ms", time_zone="UTC"))454assert s.dtype.is_temporal()455456457def test_series_head_tail_limit() -> None:458s = pl.Series(range(10))459460assert_series_equal(s.head(5), pl.Series(range(5)))461assert_series_equal(s.limit(5), s.head(5))462assert_series_equal(s.tail(5), pl.Series(range(5, 10)))463464# check if it doesn't fail when out of bounds465assert s.head(100).len() == 10466assert s.limit(100).len() == 10467assert s.tail(100).len() == 10468469# negative values470assert_series_equal(s.head(-7), pl.Series(range(3)))471assert s.head(-2).len() == 8472assert_series_equal(s.tail(-8), pl.Series(range(8, 10)))473assert s.head(-6).len() == 4474475# negative values out of bounds476assert s.head(-12).len() == 0477assert s.limit(-12).len() == 0478assert s.tail(-12).len() == 0479480481def test_filter_ops() -> None:482a = pl.Series("a", range(20))483assert a.filter(a > 1).len() == 18484assert a.filter(a < 1).len() == 1485assert a.filter(a <= 1).len() == 2486assert a.filter(a >= 1).len() == 19487assert a.filter(a == 1).len() == 1488assert a.filter(a != 1).len() == 19489490491def test_cast() -> None:492a = pl.Series("a", range(20))493494assert a.cast(pl.Float32).dtype == pl.Float32495assert a.cast(pl.Float64).dtype == pl.Float64496assert a.cast(pl.Int32).dtype == pl.Int32497assert a.cast(pl.UInt32).dtype == pl.UInt32498assert a.cast(pl.Datetime).dtype == pl.Datetime499assert a.cast(pl.Date).dtype == pl.Date500501# display failed values, GH#4706502with pytest.raises(InvalidOperationError, match="foobar"):503pl.Series(["1", "2", "3", "4", "foobar"]).cast(int)504505506@pytest.mark.parametrize(507"test_data",508[509[1, None, 2],510["abc", None, "xyz"],511[None, datetime.now()],512[[1, 2], [3, 4], None],513],514)515def test_to_pandas(test_data: list[Any]) -> None:516a = pl.Series("s", test_data)517b = a.to_pandas()518519assert a.name == b.name520assert b.isnull().sum() == 1521522vals_b: list[Any]523if a.dtype == pl.List:524vals_b = [(None if x is None else x.tolist()) for x in b]525else:526v = b.replace({np.nan: None}).values.tolist()527vals_b = cast("list[Any]", v)528529assert vals_b == test_data530531try:532c = a.to_pandas(use_pyarrow_extension_array=True)533assert a.name == c.name534assert c.isnull().sum() == 1535vals_c = [None if x is pd.NA else x for x in c.tolist()]536assert vals_c == test_data537except ModuleNotFoundError:538# Skip test if pandas>=1.5.0 or Pyarrow>=8.0.0 is not installed.539pass540541542def test_series_to_list() -> None:543s = pl.Series("a", range(20))544result = s.to_list()545assert isinstance(result, list)546assert len(result) == 20547548a = pl.Series("a", [1, None, 2])549assert a.null_count() == 1550assert a.to_list() == [1, None, 2]551552553@pytest.mark.may_fail_cloud # reason: list.to_struct is a eager operation554def test_to_struct() -> None:555s = pl.Series("nums", ["12 34", "56 78", "90 00"]).str.extract_all(r"\d+")556557assert s.list.to_struct().struct.fields == ["field_0", "field_1"]558assert s.list.to_struct(fields=lambda idx: f"n{idx:02}").struct.fields == [559"n00",560"n01",561]562assert_frame_equal(563s.list.to_struct(fields=["one", "two"]).struct.unnest(),564pl.DataFrame({"one": ["12", "56", "90"], "two": ["34", "78", "00"]}),565)566567568def test_to_struct_empty() -> None:569df = pl.DataFrame({"y": [[], [], []]}, schema={"y": pl.List(pl.Int64)})570empty_df = df.select(pl.col("y").list.to_struct(fields=[]).struct.unnest())571assert empty_df.shape == (0, 0)572573574def test_sort() -> None:575a = pl.Series("a", [2, 1, 3])576assert_series_equal(a.sort(), pl.Series("a", [1, 2, 3]))577assert_series_equal(a.sort(descending=True), pl.Series("a", [3, 2, 1]))578579580def test_rechunk() -> None:581a = pl.Series("a", [1, 2, 3])582b = pl.Series("b", [4, 5, 6])583a.append(b)584assert a.n_chunks() == 2585assert a.rechunk(in_place=False).n_chunks() == 1586a.rechunk(in_place=True)587assert a.n_chunks() == 1588589590def test_indexing() -> None:591a = pl.Series("a", [1, 2, None])592assert a[1] == 2593assert a[2] is None594b = pl.Series("b", [True, False])595assert b[0]596assert not b[1]597a = pl.Series("a", ["a", None])598assert a[0] == "a"599assert a[1] is None600a = pl.Series("a", [0.1, None])601assert a[0] == 0.1602assert a[1] is None603604605def test_arrow() -> None:606a = pl.Series("a", [1, 2, 3, None])607out = a.to_arrow()608assert out == pa.array([1, 2, 3, None])609610b = pl.Series("b", [1.0, 2.0, 3.0, None])611out = b.to_arrow()612assert out == pa.array([1.0, 2.0, 3.0, None])613614c = pl.Series("c", ["A", "BB", "CCC", None])615out = c.to_arrow()616assert out == pa.array(["A", "BB", "CCC", None], type=pa.large_string())617assert_series_equal(pl.from_arrow(out), c.rename("")) # type: ignore[arg-type]618619out = c.to_frame().to_arrow()["c"]620assert isinstance(out, (pa.Array, pa.ChunkedArray))621assert_series_equal(pl.from_arrow(out), c) # type: ignore[arg-type]622assert_series_equal(pl.from_arrow(out, schema=["x"]), c.rename("x")) # type: ignore[arg-type]623624d = pl.Series("d", [None, None, None], pl.Null)625out = d.to_arrow()626assert out == pa.nulls(3)627628s = cast(629"pl.Series",630pl.from_arrow(pa.array([["foo"], ["foo", "bar"]], pa.list_(pa.utf8()))),631)632assert s.dtype == pl.List633634635def test_arrow_cat() -> None:636# categorical dtype tests (including various forms of empty pyarrow array)637arr0 = pa.array(["foo", "bar"], pa.dictionary(pa.int32(), pa.utf8()))638assert_series_equal(639pl.Series("arr", ["foo", "bar"], pl.Categorical), pl.Series("arr", arr0)640)641arr1 = pa.array(["xxx", "xxx", None, "yyy"]).dictionary_encode()642arr2 = pa.chunked_array([], arr1.type)643arr3 = pa.array([], arr1.type)644arr4 = pa.array([]).dictionary_encode()645646assert_series_equal(647pl.Series("arr", ["xxx", "xxx", None, "yyy"], dtype=pl.Categorical),648pl.Series("arr", arr1),649)650for arr in (arr2, arr3):651assert_series_equal(652pl.Series("arr", [], dtype=pl.Categorical), pl.Series("arr", arr)653)654assert_series_equal(pl.Series("arr", [], dtype=pl.Null), pl.Series("arr", arr4))655656657def test_pycapsule_interface() -> None:658a = pl.Series("a", [1, 2, 3, None])659out = pa.chunked_array(PyCapsuleStreamHolder(a))660out_arr = out.combine_chunks()661assert out_arr == pa.array([1, 2, 3, None])662663664def test_get() -> None:665a = pl.Series("a", [1, 2, 3])666pos_idxs = pl.Series("idxs", [2, 0, 1, 0], dtype=pl.Int8)667neg_and_pos_idxs = pl.Series(668"neg_and_pos_idxs", [-2, 1, 0, -1, 2, -3], dtype=pl.Int8669)670empty_idxs = pl.Series("idxs", [], dtype=pl.Int8)671empty_ints: list[int] = []672assert a[0] == 1673assert a[:2].to_list() == [1, 2]674assert a[range(1)].to_list() == [1]675assert a[range(0, 4, 2)].to_list() == [1, 3]676assert a[:0].to_list() == []677assert a[empty_ints].to_list() == []678assert a[neg_and_pos_idxs.to_list()].to_list() == [2, 2, 1, 3, 3, 1]679for dtype in (680pl.UInt8,681pl.UInt16,682pl.UInt32,683pl.UInt64,684pl.Int8,685pl.Int16,686pl.Int32,687pl.Int64,688):689assert a[pos_idxs.cast(dtype)].to_list() == [3, 1, 2, 1]690assert a[pos_idxs.cast(dtype).to_numpy()].to_list() == [3, 1, 2, 1]691assert a[empty_idxs.cast(dtype)].to_list() == []692assert a[empty_idxs.cast(dtype).to_numpy()].to_list() == []693694for dtype in (pl.Int8, pl.Int16, pl.Int32, pl.Int64):695nps = a[neg_and_pos_idxs.cast(dtype).to_numpy()]696assert nps.to_list() == [2, 2, 1, 3, 3, 1]697698699def test_set() -> None:700a = pl.Series("a", [True, False, True])701mask = pl.Series("msk", [True, False, True])702a[mask] = False703assert_series_equal(a, pl.Series("a", [False] * 3))704705706def test_set_value_as_list_fail() -> None:707# only allowed for numerical physical types708s = pl.Series("a", [1, 2, 3])709s[[0, 2]] = [4, 5]710assert s.to_list() == [4, 2, 5]711712# for other types it is not allowed713s = pl.Series("a", ["a", "b", "c"])714with pytest.raises(TypeError):715s[[0, 1]] = ["d", "e"]716717s = pl.Series("a", [True, False, False])718with pytest.raises(TypeError):719s[[0, 1]] = [True, False]720721722@pytest.mark.parametrize("key", [True, False, 1.0])723def test_set_invalid_key(key: Any) -> None:724s = pl.Series("a", [1, 2, 3])725with pytest.raises(TypeError):726s[key] = 1727728729@pytest.mark.parametrize(730"key",731[732pl.Series([False, True, True]),733pl.Series([1, 2], dtype=UInt32),734pl.Series([1, 2], dtype=UInt64),735],736)737def test_set_key_series(key: pl.Series) -> None:738"""Only UInt32/UInt64/bool are allowed."""739s = pl.Series("a", [1, 2, 3])740s[key] = 4741assert_series_equal(s, pl.Series("a", [1, 4, 4]))742743744def test_set_np_array_boolean_mask() -> None:745a = pl.Series("a", [1, 2, 3])746mask = np.array([True, False, True])747a[mask] = 4748assert_series_equal(a, pl.Series("a", [4, 2, 4]))749750751@pytest.mark.parametrize("dtype", [np.int32, np.int64, np.uint32, np.uint64])752def test_set_np_array(dtype: Any) -> None:753a = pl.Series("a", [1, 2, 3])754idx = np.array([0, 2], dtype=dtype)755a[idx] = 4756assert_series_equal(a, pl.Series("a", [4, 2, 4]))757758759@pytest.mark.parametrize("idx", [[0, 2], (0, 2)])760def test_set_list_and_tuple(idx: list[int] | tuple[int]) -> None:761a = pl.Series("a", [1, 2, 3])762a[idx] = 4763assert_series_equal(a, pl.Series("a", [4, 2, 4]))764765766def test_init_nested_tuple() -> None:767s1 = pl.Series("s", (1, 2, 3))768assert s1.to_list() == [1, 2, 3]769770s2 = pl.Series("s", ((1, 2, 3),), dtype=pl.List(pl.UInt8))771assert s2.to_list() == [[1, 2, 3]]772assert s2.dtype == pl.List(pl.UInt8)773774s3 = pl.Series("s", ((1, 2, 3), (1, 2, 3)), dtype=pl.List(pl.Int32))775assert s3.to_list() == [[1, 2, 3], [1, 2, 3]]776assert s3.dtype == pl.List(pl.Int32)777778779def test_fill_null() -> None:780s = pl.Series("a", [1, 2, None])781assert_series_equal(s.fill_null(strategy="forward"), pl.Series("a", [1, 2, 2]))782assert_series_equal(s.fill_null(14), pl.Series("a", [1, 2, 14], dtype=Int64))783784a = pl.Series("a", [0.0, 1.0, None, 2.0, None, 3.0])785786assert a.fill_null(0).to_list() == [0.0, 1.0, 0.0, 2.0, 0.0, 3.0]787assert a.fill_null(strategy="zero").to_list() == [0.0, 1.0, 0.0, 2.0, 0.0, 3.0]788assert a.fill_null(strategy="max").to_list() == [0.0, 1.0, 3.0, 2.0, 3.0, 3.0]789assert a.fill_null(strategy="min").to_list() == [0.0, 1.0, 0.0, 2.0, 0.0, 3.0]790assert a.fill_null(strategy="one").to_list() == [0.0, 1.0, 1.0, 2.0, 1.0, 3.0]791assert a.fill_null(strategy="forward").to_list() == [0.0, 1.0, 1.0, 2.0, 2.0, 3.0]792assert a.fill_null(strategy="backward").to_list() == [0.0, 1.0, 2.0, 2.0, 3.0, 3.0]793assert a.fill_null(strategy="mean").to_list() == [0.0, 1.0, 1.5, 2.0, 1.5, 3.0]794assert a.forward_fill().to_list() == [0.0, 1.0, 1.0, 2.0, 2.0, 3.0]795assert a.backward_fill().to_list() == [0.0, 1.0, 2.0, 2.0, 3.0, 3.0]796797b = pl.Series("b", ["a", None, "c", None, "e"])798assert b.fill_null(strategy="min").to_list() == ["a", "a", "c", "a", "e"]799assert b.fill_null(strategy="max").to_list() == ["a", "e", "c", "e", "e"]800assert b.fill_null(strategy="zero").to_list() == ["a", "", "c", "", "e"]801assert b.fill_null(strategy="forward").to_list() == ["a", "a", "c", "c", "e"]802assert b.fill_null(strategy="backward").to_list() == ["a", "c", "c", "e", "e"]803804c = pl.Series("c", [b"a", None, b"c", None, b"e"])805assert c.fill_null(strategy="min").to_list() == [b"a", b"a", b"c", b"a", b"e"]806assert c.fill_null(strategy="max").to_list() == [b"a", b"e", b"c", b"e", b"e"]807assert c.fill_null(strategy="zero").to_list() == [b"a", b"", b"c", b"", b"e"]808assert c.fill_null(strategy="forward").to_list() == [b"a", b"a", b"c", b"c", b"e"]809assert c.fill_null(strategy="backward").to_list() == [b"a", b"c", b"c", b"e", b"e"]810811df = pl.DataFrame(812[813pl.Series("i32", [1, 2, None], dtype=pl.Int32),814pl.Series("i64", [1, 2, None], dtype=pl.Int64),815pl.Series("f32", [1, 2, None], dtype=pl.Float32),816pl.Series("cat", ["a", "b", None], dtype=pl.Categorical),817pl.Series("str", ["a", "b", None], dtype=pl.String),818pl.Series("bool", [True, True, None], dtype=pl.Boolean),819]820)821822assert df.fill_null(0, matches_supertype=False).fill_null("bar").fill_null(823False824).to_dict(as_series=False) == {825"i32": [1, 2, None],826"i64": [1, 2, 0],827"f32": [1.0, 2.0, None],828"cat": ["a", "b", "bar"],829"str": ["a", "b", "bar"],830"bool": [True, True, False],831}832833assert df.fill_null(0, matches_supertype=True).fill_null("bar").fill_null(834False835).to_dict(as_series=False) == {836"i32": [1, 2, 0],837"i64": [1, 2, 0],838"f32": [1.0, 2.0, 0.0],839"cat": ["a", "b", "bar"],840"str": ["a", "b", "bar"],841"bool": [True, True, False],842}843df = pl.DataFrame({"a": [1, None, 2, None]})844845out = df.with_columns(846pl.col("a").cast(pl.UInt8).alias("u8"),847pl.col("a").cast(pl.UInt16).alias("u16"),848pl.col("a").cast(pl.UInt32).alias("u32"),849pl.col("a").cast(pl.UInt64).alias("u64"),850).fill_null(3)851852assert out.to_dict(as_series=False) == {853"a": [1, 3, 2, 3],854"u8": [1, 3, 2, 3],855"u16": [1, 3, 2, 3],856"u32": [1, 3, 2, 3],857"u64": [1, 3, 2, 3],858}859assert out.dtypes == [pl.Int64, pl.UInt8, pl.UInt16, pl.UInt32, pl.UInt64]860861862def test_str_series_min_max_10674() -> None:863str_series = pl.Series("b", ["a", None, "c", None, "e"], dtype=pl.String)864assert str_series.min() == "a"865assert str_series.max() == "e"866assert str_series.sort(descending=False).min() == "a"867assert str_series.sort(descending=True).max() == "e"868869870def test_fill_nan() -> None:871nan = float("nan")872a = pl.Series("a", [1.0, nan, 2.0, nan, 3.0])873assert_series_equal(a.fill_nan(None), pl.Series("a", [1.0, None, 2.0, None, 3.0]))874assert_series_equal(a.fill_nan(0), pl.Series("a", [1.0, 0.0, 2.0, 0.0, 3.0]))875876877def test_map_elements() -> None:878with pytest.warns(PolarsInefficientMapWarning):879a = pl.Series("a", [1, 2, None])880b = a.map_elements(lambda x: x**2, return_dtype=pl.Int64)881assert list(b) == [1, 4, None]882883with pytest.warns(PolarsInefficientMapWarning):884a = pl.Series("a", ["foo", "bar", None])885b = a.map_elements(lambda x: x + "py", return_dtype=pl.String)886assert list(b) == ["foopy", "barpy", None]887888b = a.map_elements(lambda x: len(x), return_dtype=pl.Int32)889assert list(b) == [3, 3, None]890891b = a.map_elements(lambda x: len(x))892assert list(b) == [3, 3, None]893894# just check that it runs (somehow problem with conditional compilation)895a = pl.Series("a", [2, 2, 3]).cast(pl.Datetime)896a.map_elements(lambda x: x)897a = pl.Series("a", [2, 2, 3]).cast(pl.Date)898a.map_elements(lambda x: x)899900901def test_shape() -> None:902s = pl.Series([1, 2, 3])903assert s.shape == (3,)904905906@pytest.mark.parametrize("arrow_available", [True, False])907def test_create_list_series(arrow_available: bool, monkeypatch: Any) -> None:908monkeypatch.setattr(pl.series.series, "_PYARROW_AVAILABLE", arrow_available)909a = [[1, 2], None, [None, 3]]910s = pl.Series("", a)911assert s.to_list() == a912913914def test_iter() -> None:915s = pl.Series("", [1, 2, 3])916917itr = s.__iter__()918assert itr.__next__() == 1919assert itr.__next__() == 2920assert itr.__next__() == 3921assert sum(s) == 6922923924def test_empty() -> None:925a = pl.Series(dtype=pl.Int8)926assert a.dtype == pl.Int8927assert a.is_empty()928929a = pl.Series()930assert a.dtype == pl.Null931assert a.is_empty()932933a = pl.Series("name", [])934assert a.dtype == pl.Null935assert a.is_empty()936937a = pl.Series(values=(), dtype=pl.Int8)938assert a.dtype == pl.Int8939assert a.is_empty()940941assert_series_equal(pl.Series(), pl.Series())942assert_series_equal(943pl.Series(dtype=pl.Int32), pl.Series(dtype=pl.Int64), check_dtypes=False944)945946with pytest.raises(TypeError, match="ambiguous"):947not pl.Series()948949950def test_round() -> None:951a = pl.Series("f", [1.003, 2.003])952b = a.round(2)953assert b.to_list() == [1.00, 2.00]954955b = a.round()956assert b.to_list() == [1.0, 2.0]957958959def test_round_int() -> None:960s = pl.Series([1, 2, 3])961assert_series_equal(s, s.round())962963964@pytest.mark.parametrize(965("series", "digits", "expected_result"),966[967pytest.param(pl.Series([1.234, 0.1234]), 2, pl.Series([1.2, 0.12]), id="f64"),968pytest.param(969pl.Series([1.234, 0.1234]).cast(pl.Float32),9702,971pl.Series([1.2, 0.12]).cast(pl.Float32),972id="f32",973),974pytest.param(pl.Series([123400, 1234]), 2, pl.Series([120000, 1200]), id="i64"),975pytest.param(976pl.Series([123400, 1234]).cast(pl.Int32),9772,978pl.Series([120000, 1200]).cast(pl.Int32),979id="i32",980),981pytest.param(982pl.Series([0.0]), 2, pl.Series([0.0]), id="0 should remain the same"983),984],985)986def test_round_sig_figs(987series: pl.Series, digits: int, expected_result: pl.Series988) -> None:989result = series.round_sig_figs(digits=digits)990assert_series_equal(result, expected_result)991992993def test_round_sig_figs_raises_exc() -> None:994with pytest.raises(pl.exceptions.InvalidOperationError):995pl.Series([1.234, 0.1234]).round_sig_figs(digits=0)996997998def test_apply_list_out() -> None:999s = pl.Series("count", [3, 2, 2])1000out = s.map_elements(lambda val: pl.repeat(val, val, eager=True))1001assert out[0].to_list() == [3, 3, 3]1002assert out[1].to_list() == [2, 2]1003assert out[2].to_list() == [2, 2]100410051006def test_reinterpret() -> None:1007s = pl.Series("a", [1, 1, 2], dtype=pl.UInt64)1008assert s.reinterpret(signed=True).dtype == pl.Int641009df = pl.DataFrame([s])1010assert df.select([pl.col("a").reinterpret(signed=True)])["a"].dtype == pl.Int64101110121013def test_mode() -> None:1014s = pl.Series("a", [1, 1, 2])1015assert s.mode().to_list() == [1]1016assert s.set_sorted().mode().to_list() == [1]10171018df = pl.DataFrame([s])1019assert df.select([pl.col("a").mode()])["a"].to_list() == [1]1020assert (1021pl.Series(["foo", "bar", "buz", "bar"], dtype=pl.Categorical).mode().item()1022== "bar"1023)1024assert pl.Series([1.0, 2.0, 3.0, 2.0]).mode().item() == 2.01025assert pl.Series(["a", "b", "c", "b"]).mode().item() == "b"10261027# sorted data1028assert set(pl.int_range(0, 3, eager=True).mode().to_list()) == {0, 1, 2}102910301031def test_diff() -> None:1032s = pl.Series("a", [1, 2, 3, 2, 2, 3, 0])10331034assert_series_equal(1035s.diff(),1036pl.Series("a", [None, 1, 1, -1, 0, 1, -3]),1037)1038assert_series_equal(1039s.diff(null_behavior="drop"),1040pl.Series("a", [1, 1, -1, 0, 1, -3]),1041)104210431044def test_diff_negative() -> None:1045s = pl.Series("a", [1, 2, 3, 2, 2, 3, 0])10461047assert_series_equal(1048s.diff(-1),1049pl.Series("a", [-1, -1, 1, 0, -1, 3, None]),1050)1051assert_series_equal(1052s.diff(-1, null_behavior="drop"),1053pl.Series("a", [-1, -1, 1, 0, -1, 3]),1054)105510561057def test_pct_change() -> None:1058s = pl.Series("a", [1, 2, 4, 8, 16, 32, 64])1059expected = pl.Series("a", [None, None, 3.0, 3.0, 3.0, 3.0, 3.0])1060assert_series_equal(s.pct_change(2), expected)1061assert_series_equal(s.pct_change(pl.Series([2])), expected)1062# negative1063assert pl.Series(range(5)).pct_change(-1).to_list() == [1064-1.0,1065-0.5,1066-0.3333333333333333,1067-0.25,1068None,1069]107010711072def test_skew() -> None:1073s = pl.Series("a", [1, 2, 3, 2, 2, 3, 0])10741075assert s.skew(bias=True) == pytest.approx(-0.5953924651018018)1076assert s.skew(bias=False) == pytest.approx(-0.7717168360221258)10771078df = pl.DataFrame([s])1079assert np.isclose(1080df.select(pl.col("a").skew(bias=False))["a"][0], -0.77171683602212581081)108210831084def test_kurtosis() -> None:1085s = pl.Series("a", [1, 2, 3, 2, 2, 3, 0])1086expected = -0.640625000000000410871088assert s.kurtosis() == pytest.approx(expected)1089df = pl.DataFrame([s])1090assert np.isclose(df.select(pl.col("a").kurtosis())["a"][0], expected)109110921093def test_sqrt() -> None:1094s = pl.Series("a", [1, 2])1095assert_series_equal(s.sqrt(), pl.Series("a", [1.0, np.sqrt(2)]))1096df = pl.DataFrame([s])1097assert_series_equal(1098df.select(pl.col("a").sqrt())["a"], pl.Series("a", [1.0, np.sqrt(2)])1099)110011011102def test_cbrt() -> None:1103s = pl.Series("a", [1, 2])1104assert_series_equal(s.cbrt(), pl.Series("a", [1.0, np.cbrt(2)]))1105df = pl.DataFrame([s])1106assert_series_equal(1107df.select(pl.col("a").cbrt())["a"], pl.Series("a", [1.0, np.cbrt(2)])1108)110911101111def test_range() -> None:1112s1 = pl.Series("a", [1, 2, 3, 2, 2, 3, 0])1113assert_series_equal(s1[2:5], s1[range(2, 5)])11141115ranges = [range(-2, 1), range(3), range(2, 8, 2)]11161117s2 = pl.Series("b", ranges, dtype=pl.List(pl.Int8))1118assert s2.to_list() == [[-2, -1, 0], [0, 1, 2], [2, 4, 6]]1119assert s2.dtype == pl.List(pl.Int8)1120assert s2.name == "b"11211122s3 = pl.Series("c", (ranges for _ in range(3)))1123assert s3.to_list() == [1124[[-2, -1, 0], [0, 1, 2], [2, 4, 6]],1125[[-2, -1, 0], [0, 1, 2], [2, 4, 6]],1126[[-2, -1, 0], [0, 1, 2], [2, 4, 6]],1127]1128assert s3.dtype == pl.List(pl.List(pl.Int64))11291130df = pl.DataFrame([s1])1131assert_frame_equal(df[2:5], df[range(2, 5)])113211331134def test_strict_cast() -> None:1135with pytest.raises(InvalidOperationError):1136pl.Series("a", [2**16]).cast(dtype=pl.Int16, strict=True)1137with pytest.raises(InvalidOperationError):1138pl.DataFrame({"a": [2**16]}).select([pl.col("a").cast(pl.Int16, strict=True)])113911401141def test_floor_divide() -> None:1142s = pl.Series("a", [1, 2, 3])1143assert_series_equal(s // 2, pl.Series("a", [0, 1, 1]))1144assert_series_equal(1145pl.DataFrame([s]).select(pl.col("a") // 2)["a"], pl.Series("a", [0, 1, 1])1146)114711481149def test_true_divide() -> None:1150s = pl.Series("a", [1, 2])1151assert_series_equal(s / 2, pl.Series("a", [0.5, 1.0]))1152assert_series_equal(1153pl.DataFrame([s]).select(pl.col("a") / 2)["a"], pl.Series("a", [0.5, 1.0])1154)11551156# rtruediv1157assert_series_equal(1158pl.DataFrame([s]).select(2 / pl.col("a"))["literal"],1159pl.Series("literal", [2.0, 1.0]),1160)11611162# https://github.com/pola-rs/polars/issues/13691163vals = [3000000000, 2, 3]1164foo = pl.Series(vals)1165assert_series_equal(foo / 1, pl.Series(vals, dtype=Float64))1166assert_series_equal(1167pl.DataFrame({"a": vals}).select([pl.col("a") / 1])["a"],1168pl.Series("a", vals, dtype=Float64),1169)117011711172def test_bitwise() -> None:1173a = pl.Series("a", [1, 2, 3])1174b = pl.Series("b", [3, 4, 5])1175assert_series_equal(a & b, pl.Series("a", [1, 0, 1]))1176assert_series_equal(a | b, pl.Series("a", [3, 6, 7]))1177assert_series_equal(a ^ b, pl.Series("a", [2, 6, 6]))11781179df = pl.DataFrame([a, b])1180out = df.select(1181(pl.col("a") & pl.col("b")).alias("and"),1182(pl.col("a") | pl.col("b")).alias("or"),1183(pl.col("a") ^ pl.col("b")).alias("xor"),1184)1185assert_series_equal(out["and"], pl.Series("and", [1, 0, 1]))1186assert_series_equal(out["or"], pl.Series("or", [3, 6, 7]))1187assert_series_equal(out["xor"], pl.Series("xor", [2, 6, 6]))11881189# ensure mistaken use of logical 'and'/'or' raises an exception1190with pytest.raises(TypeError, match="ambiguous"):1191a and b # type: ignore[redundant-expr]11921193with pytest.raises(TypeError, match="ambiguous"):1194a or b # type: ignore[redundant-expr]119511961197def test_from_generator_or_iterable() -> None:1198# generator function1199def gen(n: int) -> Iterator[int]:1200yield from range(n)12011202# iterable object1203class Data:1204def __init__(self, n: int) -> None:1205self._n = n12061207def __iter__(self) -> Iterator[int]:1208yield from gen(self._n)12091210expected = pl.Series("s", range(10))1211assert expected.dtype == pl.Int6412121213for generated_series in (1214pl.Series("s", values=gen(10)),1215pl.Series("s", values=Data(10)),1216pl.Series("s", values=(x for x in gen(10))),1217):1218assert_series_equal(expected, generated_series)12191220# test 'iterable_to_pyseries' directly to validate 'chunk_size' behaviour1221ps1 = iterable_to_pyseries("s", gen(10), dtype=pl.UInt8)1222ps2 = iterable_to_pyseries("s", gen(10), dtype=pl.UInt8, chunk_size=3)1223ps3 = iterable_to_pyseries("s", Data(10), dtype=pl.UInt8, chunk_size=6)12241225expected = pl.Series("s", range(10), dtype=pl.UInt8)1226assert expected.dtype == pl.UInt812271228for ps in (ps1, ps2, ps3):1229generated_series = pl.Series("s")1230generated_series._s = ps1231assert_series_equal(expected, generated_series)12321233# empty generator1234assert_series_equal(pl.Series("s", []), pl.Series("s", values=gen(0)))123512361237def test_from_sequences(monkeypatch: Any) -> None:1238# test int, str, bool, flt1239values = [1240[[1], [None, 3]],1241[["foo"], [None, "bar"]],1242[[True], [None, False]],1243[[1.0], [None, 3.0]],1244]12451246for vals in values:1247monkeypatch.setattr(pl.series.series, "_PYARROW_AVAILABLE", False)1248a = pl.Series("a", vals)1249monkeypatch.setattr(pl.series.series, "_PYARROW_AVAILABLE", True)1250b = pl.Series("a", vals)1251assert_series_equal(a, b)1252assert a.to_list() == vals125312541255def test_comparisons_int_series_to_float() -> None:1256srs_int = pl.Series([1, 2, 3, 4])12571258assert_series_equal(srs_int - 1.0, pl.Series([0.0, 1.0, 2.0, 3.0]))1259assert_series_equal(srs_int + 1.0, pl.Series([2.0, 3.0, 4.0, 5.0]))1260assert_series_equal(srs_int * 2.0, pl.Series([2.0, 4.0, 6.0, 8.0]))1261assert_series_equal(srs_int / 2.0, pl.Series([0.5, 1.0, 1.5, 2.0]))1262assert_series_equal(srs_int % 2.0, pl.Series([1.0, 0.0, 1.0, 0.0]))1263assert_series_equal(4.0 % srs_int, pl.Series([0.0, 0.0, 1.0, 0.0]))12641265assert_series_equal(srs_int // 2.0, pl.Series([0.0, 1.0, 1.0, 2.0]))1266assert_series_equal(srs_int < 3.0, pl.Series([True, True, False, False]))1267assert_series_equal(srs_int <= 3.0, pl.Series([True, True, True, False]))1268assert_series_equal(srs_int > 3.0, pl.Series([False, False, False, True]))1269assert_series_equal(srs_int >= 3.0, pl.Series([False, False, True, True]))1270assert_series_equal(srs_int == 3.0, pl.Series([False, False, True, False]))1271assert_series_equal(srs_int - True, pl.Series([0, 1, 2, 3]))127212731274def test_comparisons_int_series_to_float_scalar() -> None:1275srs_int = pl.Series([1, 2, 3, 4])12761277assert_series_equal(srs_int < 1.5, pl.Series([True, False, False, False]))1278assert_series_equal(srs_int > 1.5, pl.Series([False, True, True, True]))127912801281def test_comparisons_datetime_series_to_date_scalar() -> None:1282srs_date = pl.Series([date(2023, 1, 1), date(2023, 1, 2), date(2023, 1, 3)])1283dt = datetime(2023, 1, 1, 12, 0, 0)12841285assert_series_equal(srs_date < dt, pl.Series([True, False, False]))1286assert_series_equal(srs_date > dt, pl.Series([False, True, True]))128712881289def test_comparisons_float_series_to_int() -> None:1290srs_float = pl.Series([1.0, 2.0, 3.0, 4.0])12911292assert_series_equal(srs_float - 1, pl.Series([0.0, 1.0, 2.0, 3.0]))1293assert_series_equal(srs_float + 1, pl.Series([2.0, 3.0, 4.0, 5.0]))1294assert_series_equal(srs_float * 2, pl.Series([2.0, 4.0, 6.0, 8.0]))1295assert_series_equal(srs_float / 2, pl.Series([0.5, 1.0, 1.5, 2.0]))1296assert_series_equal(srs_float % 2, pl.Series([1.0, 0.0, 1.0, 0.0]))1297assert_series_equal(4 % srs_float, pl.Series([0.0, 0.0, 1.0, 0.0]))12981299assert_series_equal(srs_float // 2, pl.Series([0.0, 1.0, 1.0, 2.0]))1300assert_series_equal(srs_float < 3, pl.Series([True, True, False, False]))1301assert_series_equal(srs_float <= 3, pl.Series([True, True, True, False]))1302assert_series_equal(srs_float > 3, pl.Series([False, False, False, True]))1303assert_series_equal(srs_float >= 3, pl.Series([False, False, True, True]))1304assert_series_equal(srs_float == 3, pl.Series([False, False, True, False]))1305assert_series_equal(srs_float - True, pl.Series([0.0, 1.0, 2.0, 3.0]))130613071308def test_comparisons_bool_series_to_int() -> None:1309srs_bool = pl.Series([True, False])13101311# (native bool comparison should work...)1312for t, f in ((True, False), (False, True)):1313assert list(srs_bool == t) == list(srs_bool != f) == [t, f]13141315# TODO: do we want this to work?1316assert_series_equal(srs_bool / 1, pl.Series([True, False], dtype=Float64))1317match = (1318r"cannot do arithmetic with Series of dtype: Boolean"1319r" and argument of type: 'bool'"1320)1321with pytest.raises(TypeError, match=match):1322srs_bool - 11323with pytest.raises(TypeError, match=match):1324srs_bool + 11325match = (1326r"cannot do arithmetic with Series of dtype: Boolean"1327r" and argument of type: 'bool'"1328)1329with pytest.raises(TypeError, match=match):1330srs_bool % 21331with pytest.raises(TypeError, match=match):1332srs_bool * 113331334from operator import ge, gt, le, lt13351336for op in (ge, gt, le, lt):1337for scalar in (0, 1.0, True, False):1338op_str = op.__name__.replace("e", "t_eq")1339with pytest.raises(1340NotImplementedError,1341match=rf"Series of type Boolean does not have {op_str} operator",1342):1343op(srs_bool, scalar)134413451346@pytest.mark.parametrize(1347("values", "compare_with", "compares_equal"),1348[1349(1350[date(1999, 12, 31), date(2021, 1, 31)],1351date(2021, 1, 31),1352[False, True],1353),1354(1355[datetime(2021, 1, 1, 12, 0, 0), datetime(2021, 1, 2, 12, 0, 0)],1356datetime(2021, 1, 1, 12, 0, 0),1357[True, False],1358),1359(1360[timedelta(days=1), timedelta(days=2)],1361timedelta(days=1),1362[True, False],1363),1364],1365)1366def test_temporal_comparison(1367values: list[Any], compare_with: Any, compares_equal: list[bool]1368) -> None:1369assert_series_equal(1370pl.Series(values) == compare_with,1371pl.Series(compares_equal, dtype=pl.Boolean),1372)137313741375def test_to_dummies() -> None:1376s = pl.Series("a", [1, 2, 3])1377result = s.to_dummies()1378expected = pl.DataFrame(1379{"a_1": [1, 0, 0], "a_2": [0, 1, 0], "a_3": [0, 0, 1]},1380schema={"a_1": pl.UInt8, "a_2": pl.UInt8, "a_3": pl.UInt8},1381)1382assert_frame_equal(result, expected)138313841385def test_to_dummies_drop_first() -> None:1386s = pl.Series("a", [1, 2, 3])1387result = s.to_dummies(drop_first=True)1388expected = pl.DataFrame(1389{"a_2": [0, 1, 0], "a_3": [0, 0, 1]},1390schema={"a_2": pl.UInt8, "a_3": pl.UInt8},1391)1392assert_frame_equal(result, expected)139313941395def test_to_dummies_drop_nulls() -> None:1396s = pl.Series("a", [1, 2, None])1397result = s.to_dummies(drop_nulls=True)1398expected = pl.DataFrame(1399{"a_1": [1, 0, 0], "a_2": [0, 1, 0]},1400schema={"a_1": pl.UInt8, "a_2": pl.UInt8},1401)1402assert_frame_equal(result, expected)140314041405def test_to_dummies_null_clash_19096() -> None:1406with pytest.raises(1407DuplicateError, match="column with name '_null' has more than one occurrence"1408):1409pl.Series([None, "null"]).to_dummies()141014111412def test_chunk_lengths() -> None:1413s = pl.Series("a", [1, 2, 2, 3])1414# this is a Series with one chunk, of length 41415assert s.n_chunks() == 11416assert s.chunk_lengths() == [4]141714181419def test_limit() -> None:1420s = pl.Series("a", [1, 2, 3])1421assert_series_equal(s.limit(2), pl.Series("a", [1, 2]))142214231424def test_filter() -> None:1425s = pl.Series("a", [1, 2, 3])1426mask = pl.Series("", [True, False, True])14271428assert_series_equal(s.filter(mask), pl.Series("a", [1, 3]))1429assert_series_equal(s.filter([True, False, True]), pl.Series("a", [1, 3]))1430assert_series_equal(s.filter(np.array([True, False, True])), pl.Series("a", [1, 3]))14311432with pytest.raises(RuntimeError, match="Expected a boolean mask"):1433s.filter(np.array([1, 0, 1]))143414351436def test_gather_every() -> None:1437s = pl.Series("a", [1, 2, 3, 4])1438assert_series_equal(s.gather_every(2), pl.Series("a", [1, 3]))1439assert_series_equal(s.gather_every(2, offset=1), pl.Series("a", [2, 4]))144014411442def test_arg_sort() -> None:1443s = pl.Series("a", [5, 3, 4, 1, 2])1444expected = pl.Series("a", [3, 4, 1, 2, 0], dtype=UInt32)14451446assert_series_equal(s.arg_sort(), expected)14471448expected_descending = pl.Series("a", [0, 2, 1, 4, 3], dtype=UInt32)1449assert_series_equal(s.arg_sort(descending=True), expected_descending)145014511452@pytest.mark.parametrize(1453("series", "argmin", "argmax"),1454[1455# Numeric1456(pl.Series([5, 3, 4, 1, 2]), 3, 0),1457(pl.Series([None, 5, 1]), 2, 1),1458# Boolean1459(pl.Series([True, False]), 1, 0),1460(pl.Series([True, True]), 0, 0),1461(pl.Series([False, False]), 0, 0),1462(pl.Series([None, True, False, True]), 2, 1),1463(pl.Series([None, True, True]), 1, 1),1464(pl.Series([None, False, False]), 1, 1),1465# String1466(pl.Series(["a", "c", "b"]), 0, 1),1467(pl.Series([None, "a", None, "b"]), 1, 3),1468# Categorical1469(pl.Series(["c", "b", "a"], dtype=pl.Categorical(ordering="lexical")), 2, 0),1470(pl.Series("s", [None, "c", "b", None, "a"], pl.Categorical("lexical")), 4, 1),1471],1472)1473def test_arg_min_arg_max(series: pl.Series, argmin: int, argmax: int) -> None:1474assert series.arg_min() == argmin, (1475f"values: {series.to_list()}, expected {argmin} got {series.arg_min()}"1476)1477assert series.arg_max() == argmax, (1478f"values: {series.to_list()}, expected {argmax} got {series.arg_max()}"1479)148014811482@pytest.mark.parametrize(1483("series"),1484[1485# All nulls1486pl.Series([None, None], dtype=pl.Int32),1487pl.Series([None, None], dtype=pl.Boolean),1488pl.Series([None, None], dtype=pl.String),1489pl.Series([None, None], dtype=pl.Categorical),1490pl.Series([None, None], dtype=pl.Categorical(ordering="lexical")),1491# Empty Series1492pl.Series([], dtype=pl.Int32),1493pl.Series([], dtype=pl.Boolean),1494pl.Series([], dtype=pl.String),1495pl.Series([], dtype=pl.Categorical),1496],1497)1498def test_arg_min_arg_max_all_nulls_or_empty(series: pl.Series) -> None:1499assert series.arg_min() is None1500assert series.arg_max() is None150115021503def test_arg_min_and_arg_max_sorted() -> None:1504# test ascending and descending numerical series1505s = pl.Series([None, 1, 2, 3, 4, 5])1506s.sort(in_place=True) # set ascending sorted flag1507assert s.flags == {"SORTED_ASC": True, "SORTED_DESC": False}1508assert s.arg_min() == 11509assert s.arg_max() == 51510s = pl.Series([None, 5, 4, 3, 2, 1])1511s.sort(descending=True, in_place=True) # set descing sorted flag1512assert s.flags == {"SORTED_ASC": False, "SORTED_DESC": True}1513assert s.arg_min() == 51514assert s.arg_max() == 115151516# test ascending and descending str series1517s = pl.Series([None, "a", "b", "c", "d", "e"])1518s.sort(in_place=True) # set ascending sorted flag1519assert s.flags == {"SORTED_ASC": True, "SORTED_DESC": False}1520assert s.arg_min() == 11521assert s.arg_max() == 51522s = pl.Series([None, "e", "d", "c", "b", "a"])1523s.sort(descending=True, in_place=True) # set descing sorted flag1524assert s.flags == {"SORTED_ASC": False, "SORTED_DESC": True}1525assert s.arg_min() == 51526assert s.arg_max() == 1152715281529def test_is_null_is_not_null() -> None:1530s = pl.Series("a", [1.0, 2.0, 3.0, None])1531assert_series_equal(s.is_null(), pl.Series("a", [False, False, False, True]))1532assert_series_equal(s.is_not_null(), pl.Series("a", [True, True, True, False]))153315341535def test_is_finite_is_infinite() -> None:1536s = pl.Series("a", [1.0, 2.0, np.inf])1537assert_series_equal(s.is_finite(), pl.Series("a", [True, True, False]))1538assert_series_equal(s.is_infinite(), pl.Series("a", [False, False, True]))153915401541@pytest.mark.parametrize("float_type", [pl.Float32, pl.Float64])1542def test_is_nan_is_not_nan(float_type: PolarsDataType) -> None:1543s = pl.Series([1.0, np.nan, None], dtype=float_type)15441545assert_series_equal(s.is_nan(), pl.Series([False, True, None]))1546assert_series_equal(s.is_not_nan(), pl.Series([True, False, None]))1547assert_series_equal(s.fill_nan(2.0), pl.Series([1.0, 2.0, None], dtype=float_type))1548assert_series_equal(s.drop_nans(), pl.Series([1.0, None], dtype=float_type))154915501551def test_float_methods_on_ints() -> None:1552# these float-specific methods work on non-float numeric types1553s = pl.Series([1, None], dtype=pl.Int32)1554assert_series_equal(s.is_finite(), pl.Series([True, None]))1555assert_series_equal(s.is_infinite(), pl.Series([False, None]))1556assert_series_equal(s.is_nan(), pl.Series([False, None]))1557assert_series_equal(s.is_not_nan(), pl.Series([True, None]))155815591560def test_dot() -> None:1561s1 = pl.Series("a", [1, 2, 3])1562s2 = pl.Series("b", [4.0, 5.0, 6.0])15631564assert np.array([1, 2, 3]) @ np.array([4, 5, 6]) == 3215651566for dot_result in (1567s1.dot(s2),1568s1 @ s2,1569[1, 2, 3] @ s2,1570s1 @ np.array([4, 5, 6]),1571):1572assert dot_result == 3215731574with pytest.raises(ShapeError, match="length mismatch"):1575s1 @ [4, 5, 6, 7, 8]157615771578@pytest.mark.parametrize(1579("dtype"),1580[pl.Int8, pl.Int16, pl.Int32, pl.Float32, pl.Float64],1581)1582def test_peak_max_peak_min(dtype: pl.DataType) -> None:1583s = pl.Series("a", [4, 1, 3, 2, 5], dtype=dtype)15841585result = s.peak_min()1586expected = pl.Series("a", [False, True, False, True, False])1587assert_series_equal(result, expected)15881589result = s.peak_max()1590expected = pl.Series("a", [True, False, True, False, True])1591assert_series_equal(result, expected)159215931594def test_peak_max_peak_min_bool() -> None:1595s = pl.Series("a", [False, True, False, True, True, False], dtype=pl.Boolean)1596result = s.peak_min()1597expected = pl.Series("a", [False, False, True, False, False, False])1598assert_series_equal(result, expected)15991600result = s.peak_max()1601expected = pl.Series("a", [False, True, False, False, False, False])1602assert_series_equal(result, expected)160316041605def test_shrink_to_fit() -> None:1606s = pl.Series("a", [4, 1, 3, 2, 5])1607sf = s.shrink_to_fit(in_place=True)1608assert sf is s16091610s = pl.Series("a", [4, 1, 3, 2, 5])1611sf = s.shrink_to_fit(in_place=False)1612assert s is not sf161316141615@pytest.mark.parametrize("unit", ["ns", "us", "ms"])1616def test_cast_datetime_to_time(unit: TimeUnit) -> None:1617a = pl.Series(1618"a",1619[1620datetime(2022, 9, 7, 0, 0),1621datetime(2022, 9, 6, 12, 0),1622datetime(2022, 9, 7, 23, 59, 59),1623datetime(2022, 9, 7, 23, 59, 59, 201),1624],1625dtype=Datetime(unit),1626)1627if unit == "ms":1628# NOTE: microseconds are lost for `unit=ms`1629expected_values = [time(0, 0), time(12, 0), time(23, 59, 59), time(23, 59, 59)]1630else:1631expected_values = [1632time(0, 0),1633time(12, 0),1634time(23, 59, 59),1635time(23, 59, 59, 201),1636]1637expected = pl.Series("a", expected_values)1638assert_series_equal(a.cast(Time), expected)163916401641def test_init_categorical() -> None:1642for values in [[None], ["foo", "bar"], [None, "foo", "bar"]]:1643expected = pl.Series("a", values, dtype=pl.String).cast(pl.Categorical)1644a = pl.Series("a", values, dtype=pl.Categorical)1645assert_series_equal(a, expected)164616471648def test_iter_nested_list() -> None:1649elems = list(pl.Series("s", [[1, 2], [3, 4]]))1650assert_series_equal(elems[0], pl.Series([1, 2]))1651assert_series_equal(elems[1], pl.Series([3, 4]))16521653rev_elems = list(reversed(pl.Series("s", [[1, 2], [3, 4]])))1654assert_series_equal(rev_elems[0], pl.Series([3, 4]))1655assert_series_equal(rev_elems[1], pl.Series([1, 2]))165616571658def test_iter_nested_struct() -> None:1659# note: this feels inconsistent with the above test for nested list, but1660# let's ensure the behaviour is codified before potentially modifying...1661elems = list(pl.Series("s", [{"a": 1, "b": 2}, {"a": 3, "b": 4}]))1662assert elems[0] == {"a": 1, "b": 2}1663assert elems[1] == {"a": 3, "b": 4}16641665rev_elems = list(reversed(pl.Series("s", [{"a": 1, "b": 2}, {"a": 3, "b": 4}])))1666assert rev_elems[0] == {"a": 3, "b": 4}1667assert rev_elems[1] == {"a": 1, "b": 2}166816691670@pytest.mark.parametrize(1671"dtype",1672[1673pl.UInt8,1674pl.Float32,1675pl.Int32,1676pl.Boolean,1677pl.List(pl.String),1678pl.Struct([pl.Field("a", pl.Int64), pl.Field("b", pl.Boolean)]),1679],1680)1681def test_nested_list_types_preserved(dtype: pl.DataType) -> None:1682srs = pl.Series([pl.Series([], dtype=dtype) for _ in range(5)])1683for srs_nested in srs:1684assert srs_nested.dtype == dtype168516861687def test_to_physical() -> None:1688# casting an int result in an int1689s = pl.Series("a", [1, 2, 3])1690assert_series_equal(s.to_physical(), s)16911692# casting a date results in an Int321693s = pl.Series("a", [date(2020, 1, 1)] * 3)1694expected = pl.Series("a", [18262] * 3, dtype=Int32)1695assert_series_equal(s.to_physical(), expected)16961697# casting a categorical results in a UInt321698s = pl.Series(["cat1"]).cast(pl.Categorical)1699assert s.to_physical().dtype == pl.UInt3217001701# casting a small enum results in a UInt81702s = pl.Series(["cat1"]).cast(pl.Enum(["cat1"]))1703assert s.to_physical().dtype == pl.UInt817041705# casting a List(Categorical) results in a List(UInt32)1706s = pl.Series([["cat1"]]).cast(pl.List(pl.Categorical))1707assert s.to_physical().dtype == pl.List(pl.UInt32)17081709# casting a List(Enum) with a small enum results in a List(UInt8)1710s = pl.Series(["cat1"]).cast(pl.List(pl.Enum(["cat1"])))1711assert s.to_physical().dtype == pl.List(pl.UInt8)171217131714def test_to_physical_rechunked_21285() -> None:1715# A series with multiple chunks, dtype is array or list of structs with a1716# null field (causes rechunking) and a field with a different physical and1717# logical repr (causes the full body of `to_physical_repr` to run).1718arr_dtype = pl.Array(pl.Struct({"f0": pl.Time, "f1": pl.Null}), shape=(1,))1719s = pl.Series("a", [None], arr_dtype) # content doesn't matter1720s = s.append(s)1721expected_arr_dtype = pl.Array(pl.Struct({"f0": Int64, "f1": pl.Null}), shape=(1,))1722expected = pl.Series("a", [None, None], expected_arr_dtype)1723assert_series_equal(s.to_physical(), expected)17241725list_dtype = pl.List(pl.Struct({"f0": pl.Time, "f1": pl.Null}))1726s = pl.Series("a", [None], list_dtype) # content doesn't matter1727s = s.append(s)1728expected_list_dtype = pl.List(pl.Struct({"f0": Int64, "f1": pl.Null}))1729expected = pl.Series("a", [None, None], expected_list_dtype)1730assert_series_equal(s.to_physical(), expected)173117321733def test_is_between_datetime() -> None:1734s = pl.Series("a", [datetime(2020, 1, 1, 10, 0, 0), datetime(2020, 1, 1, 20, 0, 0)])1735start = datetime(2020, 1, 1, 12, 0, 0)1736end = datetime(2020, 1, 1, 23, 0, 0)1737expected = pl.Series("a", [False, True])17381739# only on the expression api1740result = s.to_frame().with_columns(pl.col("*").is_between(start, end)).to_series()1741assert_series_equal(result, expected)174217431744@pytest.mark.parametrize(1745"f",1746[1747"sin",1748"cos",1749"tan",1750"arcsin",1751"arccos",1752"arctan",1753"sinh",1754"cosh",1755"tanh",1756"arcsinh",1757"arccosh",1758"arctanh",1759],1760)1761@pytest.mark.filterwarnings("ignore:invalid value encountered:RuntimeWarning")1762def test_trigonometric(f: str) -> None:1763s = pl.Series("a", [0.0, math.pi, None, math.nan])1764expected = (1765pl.Series("a", getattr(np, f)(s.to_numpy()))1766.to_frame()1767.with_columns(pl.when(s.is_null()).then(None).otherwise(pl.col("a")).alias("a"))1768.to_series()1769)1770result = getattr(s, f)()1771assert_series_equal(result, expected)177217731774@pytest.mark.filterwarnings("ignore:invalid value encountered:RuntimeWarning")1775def test_trigonometric_cot() -> None:1776# cotangent is not available in numpy...1777s = pl.Series("a", [0.0, math.pi, None, math.nan])1778expected = pl.Series("a", [math.inf, -8.1656e15, None, math.nan])1779assert_series_equal(s.cot(), expected)178017811782def test_trigonometric_invalid_input() -> None:1783# String1784s = pl.Series("a", ["1", "2", "3"])1785with pytest.raises(InvalidOperationError):1786s.sin()17871788# Date1789s = pl.Series("a", [date(1990, 2, 28), date(2022, 7, 26)])1790with pytest.raises(InvalidOperationError):1791s.cosh()179217931794@pytest.mark.parametrize("dtype", INTEGER_DTYPES)1795def test_product_ints(dtype: PolarsDataType) -> None:1796a = pl.Series("a", [1, 2, 3], dtype=dtype)1797out = a.product()1798assert out == 61799a = pl.Series("a", [1, 2, None], dtype=dtype)1800out = a.product()1801assert out == 21802a = pl.Series("a", [None, 2, 3], dtype=dtype)1803out = a.product()1804assert out == 6180518061807@pytest.mark.parametrize("dtype", FLOAT_DTYPES)1808def test_product_floats(dtype: PolarsDataType) -> None:1809a = pl.Series("a", [], dtype=dtype)1810out = a.product()1811assert out == 11812a = pl.Series("a", [None, None], dtype=dtype)1813out = a.product()1814assert out == 11815a = pl.Series("a", [3.0, None, float("nan")], dtype=dtype)1816out = a.product()1817assert math.isnan(out)181818191820def test_ceil() -> None:1821s = pl.Series([1.8, 1.2, 3.0])1822expected = pl.Series([2.0, 2.0, 3.0])1823assert_series_equal(s.ceil(), expected)182418251826def test_duration_arithmetic() -> None:1827# apply some basic duration math to series1828s = pl.Series([datetime(2022, 1, 1, 10, 20, 30), datetime(2022, 1, 2, 20, 40, 50)])1829d1 = pl.duration(days=5, microseconds=123456)1830d2 = timedelta(days=5, microseconds=123456)18311832expected_values = [1833datetime(2022, 1, 6, 10, 20, 30, 123456),1834datetime(2022, 1, 7, 20, 40, 50, 123456),1835]1836for d in (d1, d2):1837df1 = pl.select((s + d).alias("d_offset"))1838df2 = pl.select((d + s).alias("d_offset"))1839assert df1["d_offset"].to_list() == expected_values1840assert_series_equal(df1["d_offset"], df2["d_offset"])184118421843def test_mean_overflow() -> None:1844arr = np.array([255] * (1 << 17), dtype="int16")1845assert arr.mean() == 255.0184618471848def test_sign() -> None:1849# Integers1850a = pl.Series("a", [-9, -0, 0, 4, None])1851expected = pl.Series("a", [-1, 0, 0, 1, None])1852assert_series_equal(a.sign(), expected)18531854# Floats1855a = pl.Series("a", [-9.0, -0.0, 0.0, 4.0, float("nan"), None])1856expected = pl.Series("a", [-1.0, 0.0, 0.0, 1.0, float("nan"), None])1857assert_series_equal(a.sign(), expected)18581859# Invalid input1860a = pl.Series("a", [date(1950, 2, 1), date(1970, 1, 1), date(2022, 12, 12), None])1861with pytest.raises(InvalidOperationError):1862a.sign()186318641865def test_exp() -> None:1866s = pl.Series("a", [0.1, 0.01, None])1867expected = pl.Series("a", [1.1051709180756477, 1.010050167084168, None])1868assert_series_equal(s.exp(), expected)1869# test if we can run on empty series as well.1870assert s[:0].exp().to_list() == []187118721873def test_cumulative_eval() -> None:1874s = pl.Series("values", [1, 2, 3, 4, 5])18751876# evaluate expressions individually1877expr1 = pl.element().first()1878expr2 = pl.element().last() ** 218791880expected1 = pl.Series("values", [1, 1, 1, 1, 1])1881expected2 = pl.Series("values", [1, 4, 9, 16, 25])1882assert_series_equal(s.cumulative_eval(expr1), expected1)1883assert_series_equal(s.cumulative_eval(expr2), expected2)18841885# evaluate combined expressions and validate1886expr3 = expr1 - expr21887expected3 = pl.Series("values", [0, -3, -8, -15, -24])1888assert_series_equal(s.cumulative_eval(expr3), expected3)188918901891def test_clip() -> None:1892s = pl.Series("foo", [-50, 5, None, 50])1893assert s.clip(1, 10).to_list() == [1, 5, None, 10]189418951896def test_repr() -> None:1897s = pl.Series("ints", [1001, 2002, 3003])1898s_repr = repr(s)18991900assert "shape: (3,)" in s_repr1901assert "Series: 'ints' [i64]" in s_repr1902for n in s.to_list():1903assert str(n) in s_repr19041905class XSeries(pl.Series):1906"""Custom Series class."""19071908# check custom class name reflected in repr output1909x = XSeries("ints", [1001, 2002, 3003])1910x_repr = repr(x)19111912assert "shape: (3,)" in x_repr1913assert "XSeries: 'ints' [i64]" in x_repr1914assert "1001" in x_repr1915for n in x.to_list():1916assert str(n) in x_repr191719181919def test_repr_html(df: pl.DataFrame) -> None:1920# check it does not panic/error, and appears to contain a table1921html = pl.Series("misc", [123, 456, 789])._repr_html_()1922assert "<table" in html192319241925@pytest.mark.parametrize(1926("value", "time_unit", "exp", "exp_type"),1927[1928(13285, "d", date(2006, 5, 17), pl.Date),1929(1147880044, "s", datetime(2006, 5, 17, 15, 34, 4), pl.Datetime),1930(1147880044 * 1_000, "ms", datetime(2006, 5, 17, 15, 34, 4), pl.Datetime("ms")),1931(19321147880044 * 1_000_000,1933"us",1934datetime(2006, 5, 17, 15, 34, 4),1935pl.Datetime("us"),1936),1937(19381147880044 * 1_000_000_000,1939"ns",1940datetime(2006, 5, 17, 15, 34, 4),1941pl.Datetime("ns"),1942),1943],1944)1945def test_from_epoch_expr(1946value: int,1947time_unit: EpochTimeUnit,1948exp: date | datetime,1949exp_type: PolarsDataType,1950) -> None:1951s = pl.Series("timestamp", [value, None])1952result = pl.from_epoch(s, time_unit=time_unit)19531954expected = pl.Series("timestamp", [exp, None]).cast(exp_type)1955assert_series_equal(result, expected)195619571958def test_get_chunks() -> None:1959a = pl.Series("a", [1, 2])1960b = pl.Series("a", [3, 4])1961chunks = pl.concat([a, b], rechunk=False).get_chunks()1962assert_series_equal(chunks[0], a)1963assert_series_equal(chunks[1], b)196419651966def test_null_comparisons() -> None:1967s = pl.Series("s", [None, "str", "a"])1968assert (s.shift() == s).null_count() == 21969assert (s.shift() != s).null_count() == 2197019711972def test_min_max_agg_on_str() -> None:1973strings = ["b", "a", "x"]1974s = pl.Series(strings)1975assert (s.min(), s.max()) == ("a", "x")197619771978def test_min_max_full_nan_15058() -> None:1979s = pl.Series([float("nan")] * 2)1980assert all(x != x for x in [s.min(), s.max()])198119821983def test_is_between() -> None:1984s = pl.Series("num", [1, 2, None, 4, 5])1985assert s.is_between(2, 4).to_list() == [False, True, None, True, False]19861987s = pl.Series("num", [1, 2, None, 4, 5])1988assert s.is_between(2, 4, closed="left").to_list() == [1989False,1990True,1991None,1992False,1993False,1994]19951996s = pl.Series("num", [1, 2, None, 4, 5])1997assert s.is_between(2, 4, closed="right").to_list() == [1998False,1999False,2000None,2001True,2002False,2003]20042005s = pl.Series("num", [1, 2, None, 4, 5])2006assert s.is_between(pl.lit(2) / 2, pl.lit(4) * 2, closed="both").to_list() == [2007True,2008True,2009None,2010True,2011True,2012]20132014s = pl.Series("s", ["a", "b", "c", "d", "e"])2015assert s.is_between("b", "d").to_list() == [2016False,2017True,2018True,2019True,2020False,2021]202220232024@pytest.mark.parametrize(2025("dtype", "lower", "upper"),2026[2027(pl.Int8, -128, 127),2028(pl.UInt8, 0, 255),2029(pl.Int16, -32768, 32767),2030(pl.UInt16, 0, 65535),2031(pl.Int32, -2147483648, 2147483647),2032(pl.UInt32, 0, 4294967295),2033(pl.Int64, -9223372036854775808, 9223372036854775807),2034(pl.UInt64, 0, 18446744073709551615),2035(pl.Float32, float("-inf"), float("inf")),2036(pl.Float64, float("-inf"), float("inf")),2037],2038)2039def test_upper_lower_bounds(2040dtype: PolarsDataType, upper: int | float, lower: int | float2041) -> None:2042s = pl.Series("s", dtype=dtype)2043assert s.lower_bound().item() == lower2044assert s.upper_bound().item() == upper204520462047def test_numpy_series_arithmetic() -> None:2048sx = pl.Series(values=[1, 2])2049y = np.array([3.0, 4.0])20502051result_add1 = y + sx2052result_add2 = sx + y2053expected_add = pl.Series([4.0, 6.0], dtype=pl.Float64)2054assert_series_equal(result_add1, expected_add) # type: ignore[arg-type]2055assert_series_equal(result_add2, expected_add)20562057result_sub1 = cast("pl.Series", y - sx) # py37 is different vs py311 on this one2058expected = pl.Series([2.0, 2.0], dtype=pl.Float64)2059assert_series_equal(result_sub1, expected)2060result_sub2 = sx - y2061expected = pl.Series([-2.0, -2.0], dtype=pl.Float64)2062assert_series_equal(result_sub2, expected)20632064result_mul1 = y * sx2065result_mul2 = sx * y2066expected = pl.Series([3.0, 8.0], dtype=pl.Float64)2067assert_series_equal(result_mul1, expected) # type: ignore[arg-type]2068assert_series_equal(result_mul2, expected)20692070result_div1 = y / sx2071expected = pl.Series([3.0, 2.0], dtype=pl.Float64)2072assert_series_equal(result_div1, expected) # type: ignore[arg-type]2073result_div2 = sx / y2074expected = pl.Series([1 / 3, 0.5], dtype=pl.Float64)2075assert_series_equal(result_div2, expected)20762077result_pow1 = y**sx2078expected = pl.Series([3.0, 16.0], dtype=pl.Float64)2079assert_series_equal(result_pow1, expected) # type: ignore[arg-type]2080result_pow2 = sx**y2081expected = pl.Series([1.0, 16.0], dtype=pl.Float64)2082assert_series_equal(result_pow2, expected) # type: ignore[arg-type]208320842085def test_from_epoch_seq_input() -> None:2086seq_input = [1147880044]2087expected = pl.Series([datetime(2006, 5, 17, 15, 34, 4)])2088result = pl.from_epoch(seq_input)2089assert_series_equal(result, expected)209020912092def test_symmetry_for_max_in_names() -> None:2093# int2094a = pl.Series("a", [1])2095assert (a - a.max()).name == (a.max() - a).name == a.name # type: ignore[union-attr]2096# float2097a = pl.Series("a", [1.0])2098assert (a - a.max()).name == (a.max() - a).name == a.name # type: ignore[union-attr]2099# duration2100a = pl.Series("a", [1], dtype=pl.Duration("ns"))2101assert (a - a.max()).name == (a.max() - a).name == a.name # type: ignore[union-attr]2102# datetime2103a = pl.Series("a", [1], dtype=pl.Datetime("ns"))2104assert (a - a.max()).name == (a.max() - a).name == a.name # type: ignore[union-attr]21052106# TODO: time arithmetic support?2107# a = pl.Series("a", [1], dtype=pl.Time)2108# assert (a - a.max()).name == (a.max() - a).name == a.name210921102111def test_series_getitem_out_of_bounds_positive() -> None:2112s = pl.Series([1, 2])2113with pytest.raises(2114IndexError, match="index 10 is out of bounds for sequence of length 2"2115):2116s[10]211721182119def test_series_getitem_out_of_bounds_negative() -> None:2120s = pl.Series([1, 2])2121with pytest.raises(2122IndexError, match="index -10 is out of bounds for sequence of length 2"2123):2124s[-10]212521262127def test_series_cmp_fast_paths() -> None:2128assert (2129pl.Series([None], dtype=pl.Int32) != pl.Series([1, 2], dtype=pl.Int32)2130).to_list() == [None, None]2131assert (2132pl.Series([None], dtype=pl.Int32) == pl.Series([1, 2], dtype=pl.Int32)2133).to_list() == [None, None]21342135assert (2136pl.Series([None], dtype=pl.String) != pl.Series(["a", "b"], dtype=pl.String)2137).to_list() == [None, None]2138assert (2139pl.Series([None], dtype=pl.String) == pl.Series(["a", "b"], dtype=pl.String)2140).to_list() == [None, None]21412142assert (2143pl.Series([None], dtype=pl.Boolean)2144!= pl.Series([True, False], dtype=pl.Boolean)2145).to_list() == [None, None]2146assert (2147pl.Series([None], dtype=pl.Boolean)2148== pl.Series([False, False], dtype=pl.Boolean)2149).to_list() == [None, None]215021512152def test_comp_series_with_str_13123() -> None:2153s = pl.Series(["1", "2", None])2154assert_series_equal(s != "1", pl.Series([False, True, None]))2155assert_series_equal(s == "1", pl.Series([True, False, None]))2156assert_series_equal(s.eq_missing("1"), pl.Series([True, False, False]))2157assert_series_equal(s.ne_missing("1"), pl.Series([False, True, True]))215821592160@pytest.mark.parametrize(2161("data", "single", "multiple", "single_expected", "multiple_expected"),2162[2163([1, 2, 3], 1, [2, 4], 0, [1, 3]),2164(["a", "b", "c"], "d", ["a", "d"], 3, [0, 3]),2165([b"a", b"b", b"c"], b"d", [b"a", b"d"], 3, [0, 3]),2166(2167[date(2022, 1, 2), date(2023, 4, 1)],2168date(2022, 1, 1),2169[date(1999, 10, 1), date(2024, 1, 1)],21700,2171[0, 2],2172),2173([1, 2, 3], 1, np.array([2, 4]), 0, [1, 3]), # test np array.2174],2175)2176def test_search_sorted(2177data: list[Any],2178single: Any,2179multiple: list[Any],2180single_expected: Any,2181multiple_expected: list[Any],2182) -> None:2183s = pl.Series(data)2184single_s = s.search_sorted(single)2185assert single_s == single_expected21862187multiple_s = s.search_sorted(multiple)2188assert_series_equal(multiple_s, pl.Series(multiple_expected, dtype=pl.UInt32))218921902191def test_series_from_pandas_with_dtype() -> None:2192expected = pl.Series("foo", [1, 2, 3], dtype=pl.Int8)2193s = pl.Series("foo", pd.Series([1, 2, 3]), pl.Int8)2194assert_series_equal(s, expected)2195s = pl.Series("foo", pd.Series([1, 2, 3], dtype="Int16"), pl.Int8)2196assert_series_equal(s, expected)21972198with pytest.raises(InvalidOperationError, match="conversion from"):2199pl.Series("foo", pd.Series([-1, 2, 3]), pl.UInt8)2200s = pl.Series("foo", pd.Series([-1, 2, 3]), pl.UInt8, strict=False)2201assert s.to_list() == [None, 2, 3]2202assert s.dtype == pl.UInt822032204with pytest.raises(InvalidOperationError, match="conversion from"):2205pl.Series("foo", pd.Series([-1, 2, 3], dtype="Int8"), pl.UInt8)2206s = pl.Series("foo", pd.Series([-1, 2, 3], dtype="Int8"), pl.UInt8, strict=False)2207assert s.to_list() == [None, 2, 3]2208assert s.dtype == pl.UInt8220922102211def test_series_from_pyarrow_with_dtype() -> None:2212s = pl.Series("foo", pa.array([-1, 2, 3]), pl.Int8)2213assert_series_equal(s, pl.Series("foo", [-1, 2, 3], dtype=pl.Int8))22142215with pytest.raises(InvalidOperationError, match="conversion from"):2216pl.Series("foo", pa.array([-1, 2, 3]), pl.UInt8)22172218s = pl.Series("foo", pa.array([-1, 2, 3]), dtype=pl.UInt8, strict=False)2219assert s.to_list() == [None, 2, 3]2220assert s.dtype == pl.UInt8222122222223def test_series_from_numpy_with_dtype() -> None:2224s = pl.Series("foo", np.array([-1, 2, 3]), pl.Int8)2225assert_series_equal(s, pl.Series("foo", [-1, 2, 3], dtype=pl.Int8))22262227with pytest.raises(InvalidOperationError, match="conversion from"):2228pl.Series("foo", np.array([-1, 2, 3]), pl.UInt8)22292230s = pl.Series("foo", np.array([-1, 2, 3]), dtype=pl.UInt8, strict=False)2231assert s.to_list() == [None, 2, 3]2232assert s.dtype == pl.UInt8223322342235def test_raise_invalid_is_between() -> None:2236with pytest.raises(pl.exceptions.InvalidOperationError):2237pl.select(pl.lit(2).is_between(pl.lit("11"), pl.lit("33")))223822392240def test_construction_large_nested_u64_17231() -> None:2241import polars as pl22422243values = [{"f0": [9223372036854775808]}]2244dtype = pl.Struct({"f0": pl.List(pl.UInt64)})2245assert pl.Series(values, dtype=dtype).to_list() == values224622472248def test_repeat_by() -> None:2249calculated = pl.select(a=pl.Series("a", [1, 2]).repeat_by(2))2250expected = pl.select(a=pl.Series("a", [[1, 1], [2, 2]]))2251assert calculated.equals(expected)225222532254def test_is_close() -> None:2255a = pl.Series(2256"a",2257[22581.0,22591.0,2260float("-inf"),2261float("inf"),2262float("inf"),2263float("inf"),2264float("nan"),2265],2266)2267b = pl.Series(2268"b", [1.3, 1.7, float("-inf"), float("inf"), float("-inf"), 1.0, float("nan")]2269)2270assert a.is_close(b, abs_tol=0.5).to_list() == [2271True,2272False,2273True,2274True,2275False,2276False,2277False,2278]227922802281def test_is_close_literal() -> None:2282a = pl.Series("a", [1.1, 1.2, 1.3, 1.4, float("inf"), float("nan")])2283assert a.is_close(1.2).to_list() == [False, True, False, False, False, False]228422852286def test_is_close_nans_equal() -> None:2287a = pl.Series("a", [1.0, float("nan")])2288b = pl.Series("b", [2.0, float("nan")])2289assert a.is_close(b, nans_equal=True).to_list() == [False, True]229022912292def test_is_close_invalid_abs_tol() -> None:2293with pytest.raises(pl.exceptions.ComputeError):2294pl.select(pl.lit(1.0).is_close(1, abs_tol=-1.0))229522962297def test_is_close_invalid_rel_tol() -> None:2298with pytest.raises(pl.exceptions.ComputeError):2299pl.select(pl.lit(1.0).is_close(1, rel_tol=-1.0))230023012302def test_comparisons_structs_raise() -> None:2303s = pl.Series([{"x": 1}, {"x": 2}, {"x": 3}])2304rhss = ["", " ", 5, {"x": 1}]2305for rhs in rhss:2306with pytest.raises(2307NotImplementedError,2308match=r"Series of type Struct\(\{'x': Int64\}\) does not have eq operator",2309):2310s == rhs # noqa: B015231123122313