Path: blob/main/py-polars/tests/unit/operations/namespaces/test_strptime.py
8427 views
"""1Module for testing `.str.strptime` of the string namespace.23This method gets its own module due to its complexity.4"""56from __future__ import annotations78from contextlib import nullcontext as does_not_raise9from datetime import date, datetime, time, timedelta, timezone10from typing import TYPE_CHECKING, Any11from zoneinfo import ZoneInfo1213import pytest1415import polars as pl16from polars.exceptions import ChronoFormatWarning, ComputeError, InvalidOperationError17from polars.testing import assert_frame_equal, assert_series_equal1819if TYPE_CHECKING:20from contextlib import AbstractContextManager2122from polars._typing import PolarsTemporalType, TimeUnit232425def test_str_strptime() -> None:26s = pl.Series(["2020-01-01", "2020-02-02"])27expected = pl.Series([date(2020, 1, 1), date(2020, 2, 2)])28assert_series_equal(s.str.strptime(pl.Date, "%Y-%m-%d"), expected)2930s = pl.Series(["2020-01-01 00:00:00", "2020-02-02 03:20:10"])31expected = pl.Series(32[datetime(2020, 1, 1, 0, 0, 0), datetime(2020, 2, 2, 3, 20, 10)]33)34assert_series_equal(s.str.strptime(pl.Datetime, "%Y-%m-%d %H:%M:%S"), expected)3536s = pl.Series(["00:00:00", "03:20:10"])37expected = pl.Series([0, 12010000000000], dtype=pl.Time)38assert_series_equal(s.str.strptime(pl.Time, "%H:%M:%S"), expected)394041def test_date_parse_omit_day() -> None:42df = pl.DataFrame({"month": ["2022-01"]})43assert df.select(pl.col("month").str.to_date(format="%Y-%m")).item() == date(442022, 1, 145)46assert df.select(47pl.col("month").str.to_datetime(format="%Y-%m")48).item() == datetime(2022, 1, 1)495051def test_to_datetime_precision() -> None:52s = pl.Series(53"date", ["2022-09-12 21:54:36.789321456", "2022-09-13 12:34:56.987456321"]54)55ds = s.str.to_datetime()56assert ds.cast(pl.Date).is_not_null().all()57assert getattr(ds.dtype, "time_unit", None) == "us"5859time_units: list[TimeUnit] = ["ms", "us", "ns"]60suffixes = ["%.3f", "%.6f", "%.9f"]61contexts: list[AbstractContextManager[Any]] = [62pytest.raises(InvalidOperationError),63pytest.raises(InvalidOperationError),64does_not_raise(),65]66test_data = zip(67time_units,68suffixes,69(70[789000000, 987000000],71[789321000, 987456000],72[789321456, 987456321],73),74contexts,75strict=False,76)77for time_unit, suffix, expected_values, context in test_data:78with context:79s.str.to_datetime(f"%Y-%m-%d %H:%M:%S{suffix}", time_unit=time_unit)80ds = s.str.to_datetime("%Y-%m-%d %H:%M:%S%.f", time_unit=time_unit)81assert getattr(ds.dtype, "time_unit", None) == time_unit82assert ds.dt.nanosecond().to_list() == expected_values838485@pytest.mark.parametrize(86("time_unit", "expected"),87[("ms", "123000000"), ("us", "123456000"), ("ns", "123456789")],88)89@pytest.mark.parametrize("format", ["%Y-%m-%d %H:%M:%S%.f", None])90def test_to_datetime_precision_with_time_unit(91time_unit: TimeUnit, expected: str, format: str92) -> None:93s = pl.Series(["2020-01-01 00:00:00.123456789"])94result = s.str.to_datetime(format, time_unit=time_unit).dt.to_string("%f")[0]95assert result == expected969798@pytest.mark.parametrize(99("tz_string", "timedelta"),100[("+01:00", timedelta(minutes=60)), ("-01:30", timedelta(hours=-1, minutes=-30))],101)102def test_timezone_aware_strptime(tz_string: str, timedelta: timedelta) -> None:103times = pl.DataFrame(104{105"delivery_datetime": [106"2021-12-05 06:00:00" + tz_string,107"2021-12-05 07:00:00" + tz_string,108"2021-12-05 08:00:00" + tz_string,109]110}111)112assert times.with_columns(113pl.col("delivery_datetime").str.to_datetime(format="%Y-%m-%d %H:%M:%S%z")114).to_dict(as_series=False) == {115"delivery_datetime": [116datetime(2021, 12, 5, 6, 0, tzinfo=timezone(timedelta)),117datetime(2021, 12, 5, 7, 0, tzinfo=timezone(timedelta)),118datetime(2021, 12, 5, 8, 0, tzinfo=timezone(timedelta)),119]120}121122123def test_to_date_non_exact_strptime() -> None:124s = pl.Series("a", ["2022-01-16", "2022-01-17", "foo2022-01-18", "b2022-01-19ar"])125format = "%Y-%m-%d"126127result = s.str.to_date(format, strict=False, exact=True)128expected = pl.Series("a", [date(2022, 1, 16), date(2022, 1, 17), None, None])129assert_series_equal(result, expected)130131result = s.str.to_date(format, strict=False, exact=False)132expected = pl.Series(133"a",134[date(2022, 1, 16), date(2022, 1, 17), date(2022, 1, 18), date(2022, 1, 19)],135)136assert_series_equal(result, expected)137138with pytest.raises(InvalidOperationError):139s.str.to_date(format, strict=True, exact=True)140141142@pytest.mark.parametrize(143("time_string", "expected"),144[145("01-02-2024", date(2024, 2, 1)),146("01.02.2024", date(2024, 2, 1)),147("01/02/2024", date(2024, 2, 1)),148("2024-02-01", date(2024, 2, 1)),149("2024/02/01", date(2024, 2, 1)),150("31-12-2024", date(2024, 12, 31)),151("31.12.2024", date(2024, 12, 31)),152("31/12/2024", date(2024, 12, 31)),153("2024-12-31", date(2024, 12, 31)),154("2024/12/31", date(2024, 12, 31)),155],156)157def test_to_date_all_inferred_date_patterns(time_string: str, expected: date) -> None:158result = pl.Series([time_string]).str.to_date()159assert result[0] == expected160161162@pytest.mark.parametrize(163("time_string", "expected"),164[165("2024-12-04 09:08:00", datetime(2024, 12, 4, 9, 8, 0)),166("2024-12-4 9:8:0", datetime(2024, 12, 4, 9, 8, 0)),167("2024/12/04 9:8", datetime(2024, 12, 4, 9, 8, 0)),168("4/12/2024 9:8", datetime(2024, 12, 4, 9, 8, 0)),169],170)171def test_to_datetime_infer_missing_digit_in_time_16092(172time_string: str, expected: datetime173) -> None:174result = pl.Series([time_string]).str.to_datetime()175assert result[0] == expected176177178@pytest.mark.parametrize(179("value", "attr"),180[181("a", "to_date"),182("ab", "to_date"),183("a", "to_datetime"),184("ab", "to_datetime"),185],186)187def test_non_exact_short_elements_10223(value: str, attr: str) -> None:188with pytest.raises((InvalidOperationError, ComputeError)):189getattr(pl.Series(["2019-01-01", value]).str, attr)(exact=False)190191192@pytest.mark.parametrize(193("offset", "time_zone", "tzinfo", "format"),194[195("+01:00", "UTC", timezone(timedelta(hours=1)), "%Y-%m-%dT%H:%M%z"),196("", None, None, "%Y-%m-%dT%H:%M"),197],198)199def test_to_datetime_non_exact_strptime(200offset: str, time_zone: str | None, tzinfo: timezone | None, format: str201) -> None:202s = pl.Series(203"a",204[205f"2022-01-16T00:00{offset}",206f"2022-01-17T00:00{offset}",207f"foo2022-01-18T00:00{offset}",208f"b2022-01-19T00:00{offset}ar",209],210)211212result = s.str.to_datetime(format, strict=False, exact=True)213expected = pl.Series(214"a",215[216datetime(2022, 1, 16, tzinfo=tzinfo),217datetime(2022, 1, 17, tzinfo=tzinfo),218None,219None,220],221)222assert_series_equal(result, expected)223assert result.dtype == pl.Datetime("us", time_zone)224225result = s.str.to_datetime(format, strict=False, exact=False)226expected = pl.Series(227"a",228[229datetime(2022, 1, 16, tzinfo=tzinfo),230datetime(2022, 1, 17, tzinfo=tzinfo),231datetime(2022, 1, 18, tzinfo=tzinfo),232datetime(2022, 1, 19, tzinfo=tzinfo),233],234)235assert_series_equal(result, expected)236assert result.dtype == pl.Datetime("us", time_zone)237238with pytest.raises(InvalidOperationError):239s.str.to_datetime(format, strict=True, exact=True)240241242def test_to_datetime_dates_datetimes() -> None:243s = pl.Series("date", ["2021-04-22", "2022-01-04 00:00:00"])244assert s.str.to_datetime().to_list() == [245datetime(2021, 4, 22, 0, 0),246datetime(2022, 1, 4, 0, 0),247]248249250@pytest.mark.parametrize(251("time_string", "expected"),252[253("09-05-2019", datetime(2019, 5, 9)),254("2018-09-05", datetime(2018, 9, 5)),255("2018-09-05T04:05:01", datetime(2018, 9, 5, 4, 5, 1)),256("2018-09-05T04:24:01.9", datetime(2018, 9, 5, 4, 24, 1, 900000)),257("2018-09-05T04:24:02.11", datetime(2018, 9, 5, 4, 24, 2, 110000)),258("2018-09-05T14:24:02.123", datetime(2018, 9, 5, 14, 24, 2, 123000)),259("2019-04-18T02:45:55.555000000", datetime(2019, 4, 18, 2, 45, 55, 555000)),260("2019-04-18T22:45:55.555123", datetime(2019, 4, 18, 22, 45, 55, 555123)),261(262"2018-09-05T04:05:01+01:00",263datetime(2018, 9, 5, 4, 5, 1, tzinfo=timezone(timedelta(hours=1))),264),265(266"2018-09-05T04:24:01.9+01:00",267datetime(2018, 9, 5, 4, 24, 1, 900000, tzinfo=timezone(timedelta(hours=1))),268),269(270"2018-09-05T04:24:02.11+01:00",271datetime(2018, 9, 5, 4, 24, 2, 110000, tzinfo=timezone(timedelta(hours=1))),272),273(274"2018-09-05T14:24:02.123+01:00",275datetime(2762018, 9, 5, 14, 24, 2, 123000, tzinfo=timezone(timedelta(hours=1))277),278),279(280"2019-04-18T02:45:55.555000000+01:00",281datetime(2822019, 4, 18, 2, 45, 55, 555000, tzinfo=timezone(timedelta(hours=1))283),284),285(286"2019-04-18T22:45:55.555123+01:00",287datetime(2882019, 4, 18, 22, 45, 55, 555123, tzinfo=timezone(timedelta(hours=1))289),290),291(292"20190418T224555.555123+01:00",293datetime(2942019, 4, 18, 22, 45, 55, 555123, tzinfo=timezone(timedelta(hours=1))295),296),297(298"20190418T224555.555123Z",299datetime(2019, 4, 18, 22, 45, 55, 555123, tzinfo=timezone.utc),300),301],302)303def test_to_datetime_patterns_single(time_string: str, expected: str) -> None:304result = pl.Series([time_string]).str.to_datetime().item()305assert result == expected306307308@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])309def test_infer_tz_aware_time_unit(time_unit: TimeUnit) -> None:310result = pl.Series(["2020-01-02T04:00:00+02:00"]).str.to_datetime(311time_unit=time_unit312)313assert result.dtype == pl.Datetime(time_unit, "UTC")314assert result.item() == datetime(2020, 1, 2, 2, 0, tzinfo=timezone.utc)315316317@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])318def test_infer_tz_aware_with_utc(time_unit: TimeUnit) -> None:319result = pl.Series(["2020-01-02T04:00:00+02:00"]).str.to_datetime(320time_unit=time_unit321)322assert result.dtype == pl.Datetime(time_unit, "UTC")323assert result.item() == datetime(2020, 1, 2, 2, 0, tzinfo=timezone.utc)324325326def test_str_to_datetime_infer_tz_aware() -> None:327result = (328pl.Series(["2020-01-02T04:00:00+02:00"])329.str.to_datetime(time_unit="us", time_zone="Europe/Vienna")330.item()331)332assert result == datetime(2020, 1, 2, 3, tzinfo=ZoneInfo("Europe/Vienna"))333334335@pytest.mark.parametrize(336"result",337[338pl.Series(["2020-01-01T00:00:00+00:00"]).str.strptime(339pl.Datetime("us", "UTC"), format="%Y-%m-%dT%H:%M:%S%z"340),341pl.Series(["2020-01-01T00:00:00+00:00"]).str.strptime(342pl.Datetime("us"), format="%Y-%m-%dT%H:%M:%S%z"343),344pl.Series(["2020-01-01T00:00:00+00:00"]).str.strptime(pl.Datetime("us", "UTC")),345pl.Series(["2020-01-01T00:00:00+00:00"]).str.strptime(pl.Datetime("us")),346pl.Series(["2020-01-01T00:00:00+00:00"]).str.to_datetime(347time_zone="UTC", format="%Y-%m-%dT%H:%M:%S%z"348),349pl.Series(["2020-01-01T00:00:00+00:00"]).str.to_datetime(350format="%Y-%m-%dT%H:%M:%S%z"351),352pl.Series(["2020-01-01T00:00:00+00:00"]).str.to_datetime(time_zone="UTC"),353pl.Series(["2020-01-01T00:00:00+00:00"]).str.to_datetime(),354],355)356def test_parsing_offset_aware_with_utc_dtype(result: pl.Series) -> None:357expected = pl.Series([datetime(2020, 1, 1, tzinfo=timezone.utc)])358assert_series_equal(result, expected)359360361def test_datetime_strptime_patterns_consistent() -> None:362# note that all should be year first363df = pl.Series(364"date",365[366"2018-09-05",367"2018-09-05T04:05:01",368"2018-09-05T04:24:01.9",369"2018-09-05T04:24:02.11",370"2018-09-05T14:24:02.123",371"2018-09-05T14:24:02.123Z",372"2019-04-18T02:45:55.555000000",373"2019-04-18T22:45:55.555123",374],375).to_frame()376s = df.with_columns(377pl.col("date").str.to_datetime(strict=False).alias("parsed"),378)["parsed"]379assert s.null_count() == 1380assert s[5] is None381382383def test_datetime_strptime_patterns_inconsistent() -> None:384# note that the pattern is inferred from the first element to385# be DatetimeDMY, and so the others (correctly) parse as `null`.386df = pl.Series(387"date",388[389"09-05-2019",390"2018-09-05",391"2018-09-05T04:05:01",392"2018-09-05T04:24:01.9",393"2018-09-05T04:24:02.11",394"2018-09-05T14:24:02.123",395"2018-09-05T14:24:02.123Z",396"2019-04-18T02:45:55.555000000",397"2019-04-18T22:45:55.555123",398],399).to_frame()400s = df.with_columns(pl.col("date").str.to_datetime(strict=False).alias("parsed"))[401"parsed"402]403assert s.null_count() == 8404assert s[0] is not None405406407@pytest.mark.parametrize(408(409"ts",410"format",411"exp_year",412"exp_month",413"exp_day",414"exp_hour",415"exp_minute",416"exp_second",417),418[419("-0031-04-24 22:13:20", "%Y-%m-%d %H:%M:%S", -31, 4, 24, 22, 13, 20),420("-0031-04-24", "%Y-%m-%d", -31, 4, 24, 0, 0, 0),421],422)423def test_parse_negative_dates(424ts: str,425format: str,426exp_year: int,427exp_month: int,428exp_day: int,429exp_hour: int,430exp_minute: int,431exp_second: int,432) -> None:433s = pl.Series([ts])434result = s.str.to_datetime(format, time_unit="ms")435# Python datetime.datetime doesn't support negative dates, so comparing436# with `result.item()` directly won't work.437assert result.dt.year().item() == exp_year438assert result.dt.month().item() == exp_month439assert result.dt.day().item() == exp_day440assert result.dt.hour().item() == exp_hour441assert result.dt.minute().item() == exp_minute442assert result.dt.second().item() == exp_second443444445def test_short_formats() -> None:446s = pl.Series(["20202020", "2020"])447assert s.str.to_date("%Y", strict=False).to_list() == [448None,449date(2020, 1, 1),450]451assert s.str.to_date("%bar", strict=False).to_list() == [None, None]452453454@pytest.mark.parametrize(455("time_string", "fmt", "datatype", "expected"),456[457("Jul/2020", "%b/%Y", pl.Date, date(2020, 7, 1)),458("Jan/2020", "%b/%Y", pl.Date, date(2020, 1, 1)),459("02/Apr/2020", "%d/%b/%Y", pl.Date, date(2020, 4, 2)),460("Dec/2020", "%b/%Y", pl.Datetime, datetime(2020, 12, 1, 0, 0)),461("Nov/2020", "%b/%Y", pl.Datetime, datetime(2020, 11, 1, 0, 0)),462("02/Feb/2020", "%d/%b/%Y", pl.Datetime, datetime(2020, 2, 2, 0, 0)),463],464)465def test_strptime_abbrev_month(466time_string: str, fmt: str, datatype: PolarsTemporalType, expected: date467) -> None:468s = pl.Series([time_string])469result = s.str.strptime(datatype, fmt).item()470assert result == expected471472473def test_full_month_name() -> None:474s = pl.Series(["2022-December-01"]).str.to_datetime("%Y-%B-%d")475assert s[0] == datetime(2022, 12, 1)476477478@pytest.mark.parametrize(479("datatype", "expected"),480[481(pl.Datetime, datetime(2022, 1, 1)),482(pl.Date, date(2022, 1, 1)),483],484)485def test_single_digit_month(486datatype: PolarsTemporalType, expected: datetime | date487) -> None:488s = pl.Series(["2022-1-1"]).str.strptime(datatype, "%Y-%m-%d")489assert s[0] == expected490491492def test_invalid_date_parsing_4898() -> None:493assert pl.Series(["2022-09-18", "2022-09-50"]).str.to_date(494"%Y-%m-%d", strict=False495).to_list() == [date(2022, 9, 18), None]496497498def test_strptime_invalid_timezone() -> None:499ts = pl.Series(["2020-01-01 00:00:00+01:00"]).str.to_datetime("%Y-%m-%d %H:%M:%S%z")500with pytest.raises(ComputeError, match=r"unable to parse time zone: 'foo'"):501ts.dt.replace_time_zone("foo")502503504def test_to_datetime_ambiguous_or_non_existent() -> None:505with pytest.raises(506ComputeError,507match="datetime '2021-11-07 01:00:00' is ambiguous in time zone 'America/Chicago'",508):509pl.Series(["2021-11-07 01:00"]).str.to_datetime(510time_unit="us", time_zone="America/Chicago"511)512with pytest.raises(513ComputeError,514match="datetime '2021-03-28 02:30:00' is non-existent in time zone 'Europe/Warsaw'",515):516pl.Series(["2021-03-28 02:30"]).str.to_datetime(517time_unit="us", time_zone="Europe/Warsaw"518)519with pytest.raises(520ComputeError,521match="datetime '2021-03-28 02:30:00' is non-existent in time zone 'Europe/Warsaw'",522):523pl.Series(["2021-03-28 02:30"]).str.to_datetime(524time_unit="us",525time_zone="Europe/Warsaw",526ambiguous="null",527)528with pytest.raises(529ComputeError,530match="datetime '2021-03-28 02:30:00' is non-existent in time zone 'Europe/Warsaw'",531):532pl.Series(["2021-03-28 02:30"] * 2).str.to_datetime(533time_unit="us",534time_zone="Europe/Warsaw",535ambiguous=pl.Series(["null", "null"]),536)537538539@pytest.mark.parametrize(540("ts", "fmt", "expected"),541[542("2020-01-01T00:00:00Z", None, datetime(2020, 1, 1, tzinfo=timezone.utc)),543("2020-01-01T00:00:00Z", "%+", datetime(2020, 1, 1, tzinfo=timezone.utc)),544(545"2020-01-01T00:00:00+01:00",546"%Y-%m-%dT%H:%M:%S%z",547datetime(2020, 1, 1, tzinfo=timezone(timedelta(seconds=3600))),548),549(550"2020-01-01T00:00:00+01:00",551"%Y-%m-%dT%H:%M:%S%:z",552datetime(2020, 1, 1, tzinfo=timezone(timedelta(seconds=3600))),553),554(555"2020-01-01T00:00:00+01:00",556"%Y-%m-%dT%H:%M:%S%#z",557datetime(2020, 1, 1, tzinfo=timezone(timedelta(seconds=3600))),558),559],560)561def test_to_datetime_tz_aware_strptime(ts: str, fmt: str, expected: datetime) -> None:562result = pl.Series([ts]).str.to_datetime(fmt).item()563assert result == expected564565566@pytest.mark.parametrize("format", ["%+", "%Y-%m-%dT%H:%M:%S%z"])567def test_crossing_dst(format: str) -> None:568ts = ["2021-03-27T23:59:59+01:00", "2021-03-28T23:59:59+02:00"]569result = pl.Series(ts).str.to_datetime(format)570assert result[0] == datetime(2021, 3, 27, 22, 59, 59, tzinfo=ZoneInfo("UTC"))571assert result[1] == datetime(2021, 3, 28, 21, 59, 59, tzinfo=ZoneInfo("UTC"))572573574@pytest.mark.parametrize("format", ["%+", "%Y-%m-%dT%H:%M:%S%z"])575def test_crossing_dst_tz_aware(format: str) -> None:576ts = ["2021-03-27T23:59:59+01:00", "2021-03-28T23:59:59+02:00"]577result = pl.Series(ts).str.to_datetime(format)578expected = pl.Series(579[580datetime(2021, 3, 27, 22, 59, 59, tzinfo=timezone.utc),581datetime(2021, 3, 28, 21, 59, 59, tzinfo=timezone.utc),582]583)584assert_series_equal(result, expected)585586587@pytest.mark.parametrize(588("data", "format", "expected"),589[590(591"2023-02-05T05:10:10.074000",592"%Y-%m-%dT%H:%M:%S%.f",593datetime(2023, 2, 5, 5, 10, 10, 74000),594),595],596)597def test_strptime_subseconds_datetime(data: str, format: str, expected: time) -> None:598s = pl.Series([data])599result = s.str.to_datetime(format).item()600assert result == expected601602603@pytest.mark.parametrize(604("string", "fmt"),605[606pytest.param("2023-05-04|7", "%Y-%m-%d|%H", id="hour but no minute"),607pytest.param("2023-05-04|7", "%Y-%m-%d|%k", id="padded hour but no minute"),608pytest.param("2023-05-04|10", "%Y-%m-%d|%M", id="minute but no hour"),609pytest.param("2023-05-04|10", "%Y-%m-%d|%S", id="second but no hour"),610pytest.param(611"2000-Jan-01 01 00 01", "%Y-%b-%d %I %M %S", id="12-hour clock but no AM/PM"612),613pytest.param(614"2000-Jan-01 01 00 01",615"%Y-%b-%d %l %M %S",616id="padded 12-hour clock but no AM/PM",617),618],619)620def test_strptime_incomplete_formats(string: str, fmt: str) -> None:621with pytest.raises(622ComputeError,623match="Invalid format string",624):625pl.Series([string]).str.to_datetime(fmt)626627628@pytest.mark.parametrize(629("string", "fmt", "expected"),630[631("2023-05-04|7:3", "%Y-%m-%d|%H:%M", datetime(2023, 5, 4, 7, 3)),632("2023-05-04|10:03", "%Y-%m-%d|%H:%M", datetime(2023, 5, 4, 10, 3)),633(634"2000-Jan-01 01 00 01 am",635"%Y-%b-%d %I %M %S %P",636datetime(2000, 1, 1, 1, 0, 1),637),638(639"2000-Jan-01 01 00 01 am",640"%Y-%b-%d %_I %M %S %P",641datetime(2000, 1, 1, 1, 0, 1),642),643(644"2000-Jan-01 01 00 01 am",645"%Y-%b-%d %l %M %S %P",646datetime(2000, 1, 1, 1, 0, 1),647),648(649"2000-Jan-01 01 00 01 AM",650"%Y-%b-%d %I %M %S %p",651datetime(2000, 1, 1, 1, 0, 1),652),653(654"2000-Jan-01 01 00 01 AM",655"%Y-%b-%d %_I %M %S %p",656datetime(2000, 1, 1, 1, 0, 1),657),658(659"2000-Jan-01 01 00 01 AM",660"%Y-%b-%d %l %M %S %p",661datetime(2000, 1, 1, 1, 0, 1),662),663],664)665def test_strptime_complete_formats(string: str, fmt: str, expected: datetime) -> None:666# Similar to the above, but these formats are complete and should work667result = pl.Series([string]).str.to_datetime(fmt).item()668assert result == expected669670671@pytest.mark.parametrize(672("data", "format", "expected"),673[674("00:00:00.000005000", "%H:%M:%S%.f", time(0, 0, 0, 5)),675("01:23:10.000500", "%H:%M:%S%.6f", time(1, 23, 10, 500)),676("08:10:11.000", "%H:%M:%S%.3f", time(8, 10, 11)),677("15:50:25", "%T", time(15, 50, 25)),678("22:35", "%R", time(22, 35)),679],680)681def test_to_time_inferred(data: str, format: str, expected: time) -> None:682df = pl.DataFrame({"tmstr": [data]})683expected_df = df.with_columns(tm=pl.Series("tm", values=[expected]))684for fmt in (format, None):685res = df.with_columns(tm=pl.col("tmstr").str.to_time(fmt))686assert_frame_equal(res, expected_df)687688689@pytest.mark.parametrize(690("data", "format", "expected"),691[692("05:10:11.740000", "%H:%M:%S%.f", time(5, 10, 11, 740000)),693("13:20:12.000074", "%T%.6f", time(13, 20, 12, 74)),694("21:30:13.007", "%H:%M:%S%.3f", time(21, 30, 13, 7000)),695],696)697def test_to_time_subseconds(data: str, format: str, expected: time) -> None:698s = pl.Series([data])699for res in (700s.str.to_time().item(),701s.str.to_time(format).item(),702):703assert res == expected704705706def test_to_time_format_warning() -> None:707s = pl.Series(["05:10:10.074000"])708with pytest.warns(ChronoFormatWarning, match=r".%f"):709result = s.str.to_time("%H:%M:%S.%f").item()710assert result == time(5, 10, 10, 74)711712713@pytest.mark.parametrize("exact", [True, False])714def test_to_datetime_ambiguous_earliest(exact: bool) -> None:715result = (716pl.Series(["2020-10-25 01:00"])717.str.to_datetime(time_zone="Europe/London", ambiguous="earliest", exact=exact)718.item()719)720expected = datetime(2020, 10, 25, 1, fold=0, tzinfo=ZoneInfo("Europe/London"))721assert result == expected722result = (723pl.Series(["2020-10-25 01:00"])724.str.to_datetime(time_zone="Europe/London", ambiguous="latest", exact=exact)725.item()726)727expected = datetime(2020, 10, 25, 1, fold=1, tzinfo=ZoneInfo("Europe/London"))728assert result == expected729with pytest.raises(ComputeError):730pl.Series(["2020-10-25 01:00"]).str.to_datetime(731time_zone="Europe/London",732exact=exact,733).item()734735736def test_to_datetime_naive_format_and_time_zone() -> None:737# format-specified path738result = pl.Series(["2020-01-01"]).str.to_datetime(739format="%Y-%m-%d", time_zone="Asia/Kathmandu"740)741expected = pl.Series([datetime(2020, 1, 1)]).dt.replace_time_zone("Asia/Kathmandu")742assert_series_equal(result, expected)743# format-inferred path744result = pl.Series(["2020-01-01"]).str.to_datetime(time_zone="Asia/Kathmandu")745assert_series_equal(result, expected)746747748@pytest.mark.parametrize("exact", [True, False])749def test_strptime_ambiguous_earliest(exact: bool) -> None:750result = (751pl.Series(["2020-10-25 01:00"])752.str.strptime(753pl.Datetime("us", "Europe/London"), ambiguous="earliest", exact=exact754)755.item()756)757expected = datetime(2020, 10, 25, 1, fold=0, tzinfo=ZoneInfo("Europe/London"))758assert result == expected759result = (760pl.Series(["2020-10-25 01:00"])761.str.strptime(762pl.Datetime("us", "Europe/London"), ambiguous="latest", exact=exact763)764.item()765)766expected = datetime(2020, 10, 25, 1, fold=1, tzinfo=ZoneInfo("Europe/London"))767assert result == expected768with pytest.raises(ComputeError):769pl.Series(["2020-10-25 01:00"]).str.strptime(770pl.Datetime("us", "Europe/London"),771exact=exact,772).item()773774775@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])776def test_to_datetime_out_of_range_13401(time_unit: TimeUnit) -> None:777s = pl.Series(["2020-January-01 12:34:66"])778with pytest.raises(InvalidOperationError, match=r"conversion .* failed"):779s.str.to_datetime("%Y-%B-%d %H:%M:%S", time_unit=time_unit)780assert (781s.str.to_datetime("%Y-%B-%d %H:%M:%S", strict=False, time_unit=time_unit).item()782is None783)784785786def test_out_of_ns_range_no_tu_specified_13592() -> None:787df = pl.DataFrame({"dates": ["2022-08-31 00:00:00.0", "0920-09-18 00:00:00.0"]})788result = df.select(pl.col("dates").str.to_datetime(format="%Y-%m-%d %H:%M:%S%.f"))[789"dates"790]791expected = pl.Series(792"dates",793[datetime(2022, 8, 31, 0, 0), datetime(920, 9, 18, 0, 0)],794dtype=pl.Datetime("us"),795)796assert_series_equal(result, expected)797798799def test_wrong_format_percent() -> None:800with pytest.raises(InvalidOperationError):801pl.Series(["2019-01-01"]).str.strptime(pl.Date, format="d%")802803804def test_polars_parser_fooled_by_trailing_nonsense_22167() -> None:805with pytest.raises(InvalidOperationError):806pl.Series(["2025-04-06T18:57:42.77756192Z"]).str.to_datetime(807"%Y-%m-%dT%H:%M:%S.%9fcabbagebananapotato"808)809with pytest.raises(InvalidOperationError):810pl.Series(["2025-04-06T18:57:42.77756192Z"]).str.to_datetime(811"%Y-%m-%dT%H:%M:%S.%9f#z"812)813with pytest.raises(InvalidOperationError):814pl.Series(["2025-04-06T18:57:42.77Z"]).str.to_datetime(815"%Y-%m-%dT%H:%M:%S.%3f#z"816)817with pytest.raises(InvalidOperationError):818pl.Series(["2025-04-06T18:57:42.77123Z"]).str.to_datetime(819"%Y-%m-%dT%H:%M:%S.%6f#z"820)821822823def test_strptime_empty_input_22214() -> None:824s = pl.Series("x", [], pl.String)825826assert s.str.strptime(pl.Time, "%H:%M:%S%.f").is_empty()827assert s.str.strptime(pl.Date, "%Y-%m-%d").is_empty()828assert s.str.strptime(pl.Datetime, "%Y-%m-%d %H:%M%#z").is_empty()829830831@pytest.mark.parametrize(832"value",833[834"31/12/2022",835"banana",836"12-345-678",837"12-345-67",838"12-345-6789",839"123*45*678",840"123x45x678",841"123x45x678x",842],843)844def test_matching_strings_but_different_format_22495(value: str) -> None:845s = pl.Series("my_strings", [value])846result = s.str.to_date("%Y-%m-%d", strict=False).item()847assert result is None848849850def test_date_parse_omit_day_month() -> None:851fmt_B = "%Y %B"852fmt_b = "%Y %b"853df = (854pl.select(date=pl.date_range(pl.date(2022, 1, 1), pl.date(2022, 12, 1), "1mo"))855.with_columns(856strdateB=pl.col("date").dt.strftime(fmt_B),857strdateb=pl.col("date").dt.strftime(fmt_b),858)859.with_columns(860round_tripB=pl.col("strdateB").str.strptime(pl.Date, fmt_B),861round_tripb=pl.col("strdateb").str.strptime(pl.Date, fmt_b),862)863)864check = df.filter(865~pl.all_horizontal(866pl.col("date") == pl.col("round_tripB"),867pl.col("date") == pl.col("round_tripb"),868)869)870assert check.height == 0871872s = pl.Series(873[874"2022 January",875"2022 February",876"2022 March",877"2022 April",878"2022 May",879"2022 June",880"2022 July",881"2022 August",882"2022 September",883"2022 October",884"2022 November",885"2022 December",886]887)888result = s.str.strptime(pl.Date, "%Y %B")889expected = pl.Series(890[891date(2022, 1, 1),892date(2022, 2, 1),893date(2022, 3, 1),894date(2022, 4, 1),895date(2022, 5, 1),896date(2022, 6, 1),897date(2022, 7, 1),898date(2022, 8, 1),899date(2022, 9, 1),900date(2022, 10, 1),901date(2022, 11, 1),902date(2022, 12, 1),903]904)905assert_series_equal(result, expected)906907908@pytest.mark.parametrize("length", [1, 5])909def test_eager_inference_on_expr(length: int) -> None:910s = pl.Series("a", ["2025-04-06T18:57:42.77123Z"] * length)911912assert_series_equal(913s.str.strptime(pl.Datetime),914pl.Series(915"a",916[917datetime(9182025, 4, 6, 18, 57, 42, 771230, tzinfo=timezone(timedelta(hours=0))919)920]921* length,922),923)924925with pytest.raises(926ComputeError,927match="`strptime` / `to_datetime` was called with no format and no time zone, but a time zone is part of the data",928):929s.to_frame().select(pl.col("a").str.strptime(pl.Datetime))930931932@pytest.mark.parametrize("maintain_order", [False, True])933def test_strptime_in_group_by(maintain_order: bool) -> None:934df = pl.DataFrame({"g": [1, 2], "a": ["AAA", "2025-01-01"]})935936assert_frame_equal(937df.group_by("g", maintain_order=maintain_order).agg(938pl.col.a.filter(pl.col.a != "AAA").str.to_date("%Y-%m-%d").min()939),940pl.DataFrame({"g": [1, 2], "a": [None, "2025-01-01"]}).with_columns(941pl.col.a.str.to_date("%Y-%m-%d")942),943check_row_order=maintain_order,944)945946947