Path: blob/main/py-polars/tests/unit/operations/namespaces/temporal/test_replace.py
6940 views
from __future__ import annotations12from datetime import date, datetime3from typing import TYPE_CHECKING45import pytest67import polars as pl8from polars.exceptions import ComputeError9from polars.testing import assert_frame_equal, assert_series_equal1011if TYPE_CHECKING:12from polars._typing import TimeUnit131415def test_replace_expr_datetime() -> None:16df = pl.DataFrame(17{18"dates": [19datetime(2088, 8, 8, 8, 8, 8, 8),20datetime(2088, 8, 8, 8, 8, 8, 8),21datetime(2088, 8, 8, 8, 8, 8, 8),22datetime(2088, 8, 8, 8, 8, 8, 8),23datetime(2088, 8, 8, 8, 8, 8, 8),24datetime(2088, 8, 8, 8, 8, 8, 8),25datetime(2088, 8, 8, 8, 8, 8, 8),26None,27],28"year": [None, 2, 3, 4, 5, 6, 7, 8],29"month": [1, None, 3, 4, 5, 6, 7, 8],30"day": [1, 2, None, 4, 5, 6, 7, 8],31"hour": [1, 2, 3, None, 5, 6, 7, 8],32"minute": [1, 2, 3, 4, None, 6, 7, 8],33"second": [1, 2, 3, 4, 5, None, 7, 8],34"microsecond": [1, 2, 3, 4, 5, 6, None, 8],35}36)3738result = df.select(39pl.col("dates").dt.replace(40year="year",41month="month",42day="day",43hour="hour",44minute="minute",45second="second",46microsecond="microsecond",47)48)4950expected = pl.DataFrame(51{52"dates": [53datetime(2088, 1, 1, 1, 1, 1, 1),54datetime(2, 8, 2, 2, 2, 2, 2),55datetime(3, 3, 8, 3, 3, 3, 3),56datetime(4, 4, 4, 8, 4, 4, 4),57datetime(5, 5, 5, 5, 8, 5, 5),58datetime(6, 6, 6, 6, 6, 8, 6),59datetime(7, 7, 7, 7, 7, 7, 8),60None,61]62}63)6465assert_frame_equal(result, expected)666768def test_replace_expr_date() -> None:69df = pl.DataFrame(70{71"dates": [date(2088, 8, 8), date(2088, 8, 8), date(2088, 8, 8), None],72"year": [None, 2, 3, 4],73"month": [1, None, 3, 4],74"day": [1, 2, None, 4],75}76)7778result = df.select(79pl.col("dates").dt.replace(year="year", month="month", day="day")80)8182expected = pl.DataFrame(83{"dates": [date(2088, 1, 1), date(2, 8, 2), date(3, 3, 8), None]}84)8586assert_frame_equal(result, expected)878889def test_replace_int_datetime() -> None:90df = pl.DataFrame(91{92"a": [93datetime(1, 1, 1, 1, 1, 1, 1),94datetime(2, 2, 2, 2, 2, 2, 2),95datetime(3, 3, 3, 3, 3, 3, 3),96None,97]98}99)100result = df.select(101pl.col("a").dt.replace().alias("no_change"),102pl.col("a").dt.replace(year=9).alias("year"),103pl.col("a").dt.replace(month=9).alias("month"),104pl.col("a").dt.replace(day=9).alias("day"),105pl.col("a").dt.replace(hour=9).alias("hour"),106pl.col("a").dt.replace(minute=9).alias("minute"),107pl.col("a").dt.replace(second=9).alias("second"),108pl.col("a").dt.replace(microsecond=9).alias("microsecond"),109)110expected = pl.DataFrame(111{112"no_change": [113datetime(1, 1, 1, 1, 1, 1, 1),114datetime(2, 2, 2, 2, 2, 2, 2),115datetime(3, 3, 3, 3, 3, 3, 3),116None,117],118"year": [119datetime(9, 1, 1, 1, 1, 1, 1),120datetime(9, 2, 2, 2, 2, 2, 2),121datetime(9, 3, 3, 3, 3, 3, 3),122None,123],124"month": [125datetime(1, 9, 1, 1, 1, 1, 1),126datetime(2, 9, 2, 2, 2, 2, 2),127datetime(3, 9, 3, 3, 3, 3, 3),128None,129],130"day": [131datetime(1, 1, 9, 1, 1, 1, 1),132datetime(2, 2, 9, 2, 2, 2, 2),133datetime(3, 3, 9, 3, 3, 3, 3),134None,135],136"hour": [137datetime(1, 1, 1, 9, 1, 1, 1),138datetime(2, 2, 2, 9, 2, 2, 2),139datetime(3, 3, 3, 9, 3, 3, 3),140None,141],142"minute": [143datetime(1, 1, 1, 1, 9, 1, 1),144datetime(2, 2, 2, 2, 9, 2, 2),145datetime(3, 3, 3, 3, 9, 3, 3),146None,147],148"second": [149datetime(1, 1, 1, 1, 1, 9, 1),150datetime(2, 2, 2, 2, 2, 9, 2),151datetime(3, 3, 3, 3, 3, 9, 3),152None,153],154"microsecond": [155datetime(1, 1, 1, 1, 1, 1, 9),156datetime(2, 2, 2, 2, 2, 2, 9),157datetime(3, 3, 3, 3, 3, 3, 9),158None,159],160}161)162assert_frame_equal(result, expected)163164165def test_replace_int_date() -> None:166df = pl.DataFrame(167{168"a": [169date(1, 1, 1),170date(2, 2, 2),171date(3, 3, 3),172None,173]174}175)176result = df.select(177pl.col("a").dt.replace().alias("no_change"),178pl.col("a").dt.replace(year=9).alias("year"),179pl.col("a").dt.replace(month=9).alias("month"),180pl.col("a").dt.replace(day=9).alias("day"),181)182expected = pl.DataFrame(183{184"no_change": [185date(1, 1, 1),186date(2, 2, 2),187date(3, 3, 3),188None,189],190"year": [191date(9, 1, 1),192date(9, 2, 2),193date(9, 3, 3),194None,195],196"month": [197date(1, 9, 1),198date(2, 9, 2),199date(3, 9, 3),200None,201],202"day": [203date(1, 1, 9),204date(2, 2, 9),205date(3, 3, 9),206None,207],208}209)210assert_frame_equal(result, expected)211212213def test_replace_ambiguous() -> None:214# Value to be replaced by an ambiguous hour.215value = pl.select(216pl.datetime(2020, 10, 25, 5, time_zone="Europe/London")217).to_series()218219input = [2020, 10, 25, 1]220tz = "Europe/London"221222# earliest223expected = pl.select(224pl.datetime(*input, time_zone=tz, ambiguous="earliest")225).to_series()226result = value.dt.replace(hour=1, ambiguous="earliest")227assert_series_equal(result, expected)228229# latest230expected = pl.select(231pl.datetime(*input, time_zone=tz, ambiguous="latest")232).to_series()233result = value.dt.replace(hour=1, ambiguous="latest")234assert_series_equal(result, expected)235236# null237expected = pl.select(238pl.datetime(*input, time_zone=tz, ambiguous="null")239).to_series()240result = value.dt.replace(hour=1, ambiguous="null")241assert_series_equal(result, expected)242243# raise244with pytest.raises(245ComputeError,246match=(247"datetime '2020-10-25 01:00:00' is ambiguous in time zone 'Europe/London'. "248"Please use `ambiguous` to tell how it should be localized."249),250):251value.dt.replace(hour=1, ambiguous="raise")252253254def test_replace_datetime_preserve_ns() -> None:255df = pl.DataFrame(256{257"a": pl.Series(["2020-01-01T00:00:00.123456789"] * 2).cast(258pl.Datetime("ns")259),260"year": [2021, None],261"microsecond": [50, None],262}263)264265result = df.select(266year=pl.col("a").dt.replace(year="year"),267us=pl.col("a").dt.replace(microsecond="microsecond"),268)269270expected = pl.DataFrame(271{272"year": pl.Series(273[274"2021-01-01T00:00:00.123456789",275"2020-01-01T00:00:00.123456789",276]277).cast(pl.Datetime("ns")),278"us": pl.Series(279[280"2020-01-01T00:00:00.000050",281"2020-01-01T00:00:00.123456789",282]283).cast(pl.Datetime("ns")),284}285)286287assert_frame_equal(result, expected)288289290@pytest.mark.parametrize("tu", ["ms", "us", "ns"])291@pytest.mark.parametrize("tzinfo", [None, "Africa/Nairobi", "America/New_York"])292def test_replace_preserve_tu_and_tz(tu: TimeUnit, tzinfo: str) -> None:293s = pl.Series(294[datetime(2024, 1, 1), datetime(2024, 1, 2)],295dtype=pl.Datetime(time_unit=tu, time_zone=tzinfo),296)297result = s.dt.replace(year=2000)298assert result.dtype.time_unit == tu # type: ignore[attr-defined]299assert result.dtype.time_zone == tzinfo # type: ignore[attr-defined]300301302def test_replace_date_invalid_components() -> None:303df = pl.DataFrame({"a": [date(2025, 1, 1)]})304305with pytest.raises(306ComputeError, match=r"Invalid date components \(2025, 13, 1\) supplied"307):308df.select(pl.col("a").dt.replace(month=13))309with pytest.raises(310ComputeError, match=r"Invalid date components \(2025, 1, 32\) supplied"311):312df.select(pl.col("a").dt.replace(day=32))313314315def test_replace_datetime_invalid_date_components() -> None:316df = pl.DataFrame({"a": [datetime(2025, 1, 1)]})317318with pytest.raises(319ComputeError, match=r"Invalid date components \(2025, 13, 1\) supplied"320):321df.select(pl.col("a").dt.replace(month=13))322with pytest.raises(323ComputeError, match=r"Invalid date components \(2025, 1, 32\) supplied"324):325df.select(pl.col("a").dt.replace(day=32))326327328def test_replace_datetime_invalid_time_components() -> None:329df = pl.DataFrame({"a": [datetime(2025, 1, 1)]})330331# hour332with pytest.raises(333ComputeError, match=r"Invalid time components \(25, 0, 0, 0\) supplied"334):335df.select(pl.col("a").dt.replace(hour=25))336337# minute338with pytest.raises(339ComputeError, match=r"Invalid time components \(0, 61, 0, 0\) supplied"340):341df.select(pl.col("a").dt.replace(minute=61))342343# second344with pytest.raises(345ComputeError, match=r"Invalid time components \(0, 0, 61, 0\) supplied"346):347df.select(pl.col("a").dt.replace(second=61))348349# microsecond350with pytest.raises(351ComputeError,352match=r"Invalid time components \(0, 0, 0, 2000000000\) supplied",353):354df.select(pl.col("a").dt.replace(microsecond=2_000_000))355356357def test_replace_unequal_length_22018() -> None:358with pytest.raises(pl.exceptions.ShapeError):359pl.Series([datetime(2088, 8, 8, 8, 8, 8, 8)] * 2).dt.replace(360year=pl.Series([2000, 2001, 2002])361)362363364def test_replace_broadcast_self() -> None:365df = pl.DataFrame(366{367"year": [None, 2, 3, 4, 5, 6, 7, 8],368"month": [1, None, 3, 4, 5, 6, 7, 8],369"day": [1, 2, None, 4, 5, 6, 7, 8],370"hour": [1, 2, 3, None, 5, 6, 7, 8],371"minute": [1, 2, 3, 4, None, 6, 7, 8],372"second": [1, 2, 3, 4, 5, None, 7, 8],373"microsecond": [1, 2, 3, 4, 5, 6, None, 8],374}375)376377result = df.select(378pl.lit(pl.Series("dates", [datetime(2088, 8, 8, 8, 8, 8, 8)])).dt.replace(379year="year",380month="month",381day="day",382hour="hour",383minute="minute",384second="second",385microsecond="microsecond",386)387)388389expected = pl.DataFrame(390{391"dates": [392datetime(2088, 1, 1, 1, 1, 1, 1),393datetime(2, 8, 2, 2, 2, 2, 2),394datetime(3, 3, 8, 3, 3, 3, 3),395datetime(4, 4, 4, 8, 4, 4, 4),396datetime(5, 5, 5, 5, 8, 5, 5),397datetime(6, 6, 6, 6, 6, 8, 6),398datetime(7, 7, 7, 7, 7, 7, 8),399datetime(8, 8, 8, 8, 8, 8, 8),400]401}402)403404assert_frame_equal(result, expected)405406407