Path: blob/main/py-polars/tests/unit/operations/namespaces/test_strptime.py
6940 views
"""1Module for testing `.str.strptime` of the string namespace.23This method gets its own module due to its complexity.4"""56from __future__ import annotations78from datetime import date, datetime, time, timedelta, timezone9from typing import TYPE_CHECKING10from zoneinfo import ZoneInfo1112import pytest1314import polars as pl15from polars.exceptions import ChronoFormatWarning, ComputeError, InvalidOperationError16from polars.testing import assert_frame_equal, assert_series_equal1718if TYPE_CHECKING:19from polars._typing import PolarsTemporalType, TimeUnit202122def test_str_strptime() -> None:23s = pl.Series(["2020-01-01", "2020-02-02"])24expected = pl.Series([date(2020, 1, 1), date(2020, 2, 2)])25assert_series_equal(s.str.strptime(pl.Date, "%Y-%m-%d"), expected)2627s = pl.Series(["2020-01-01 00:00:00", "2020-02-02 03:20:10"])28expected = pl.Series(29[datetime(2020, 1, 1, 0, 0, 0), datetime(2020, 2, 2, 3, 20, 10)]30)31assert_series_equal(s.str.strptime(pl.Datetime, "%Y-%m-%d %H:%M:%S"), expected)3233s = pl.Series(["00:00:00", "03:20:10"])34expected = pl.Series([0, 12010000000000], dtype=pl.Time)35assert_series_equal(s.str.strptime(pl.Time, "%H:%M:%S"), expected)363738def test_date_parse_omit_day() -> None:39df = pl.DataFrame({"month": ["2022-01"]})40assert df.select(pl.col("month").str.to_date(format="%Y-%m")).item() == date(412022, 1, 142)43assert df.select(44pl.col("month").str.to_datetime(format="%Y-%m")45).item() == datetime(2022, 1, 1)464748def test_to_datetime_precision() -> None:49s = pl.Series(50"date", ["2022-09-12 21:54:36.789321456", "2022-09-13 12:34:56.987456321"]51)52ds = s.str.to_datetime()53assert ds.cast(pl.Date).is_not_null().all()54assert getattr(ds.dtype, "time_unit", None) == "us"5556time_units: list[TimeUnit] = ["ms", "us", "ns"]57suffixes = ["%.3f", "%.6f", "%.9f"]58test_data = zip(59time_units,60suffixes,61(62[789000000, 987000000],63[789321000, 987456000],64[789321456, 987456321],65),66)67for time_unit, suffix, expected_values in test_data:68ds = s.str.to_datetime(f"%Y-%m-%d %H:%M:%S{suffix}", time_unit=time_unit)69assert getattr(ds.dtype, "time_unit", None) == time_unit70assert ds.dt.nanosecond().to_list() == expected_values717273@pytest.mark.parametrize(74("time_unit", "expected"),75[("ms", "123000000"), ("us", "123456000"), ("ns", "123456789")],76)77@pytest.mark.parametrize("format", ["%Y-%m-%d %H:%M:%S%.f", None])78def test_to_datetime_precision_with_time_unit(79time_unit: TimeUnit, expected: str, format: str80) -> None:81s = pl.Series(["2020-01-01 00:00:00.123456789"])82result = s.str.to_datetime(format, time_unit=time_unit).dt.to_string("%f")[0]83assert result == expected848586@pytest.mark.parametrize(87("tz_string", "timedelta"),88[("+01:00", timedelta(minutes=60)), ("-01:30", timedelta(hours=-1, minutes=-30))],89)90def test_timezone_aware_strptime(tz_string: str, timedelta: timedelta) -> None:91times = pl.DataFrame(92{93"delivery_datetime": [94"2021-12-05 06:00:00" + tz_string,95"2021-12-05 07:00:00" + tz_string,96"2021-12-05 08:00:00" + tz_string,97]98}99)100assert times.with_columns(101pl.col("delivery_datetime").str.to_datetime(format="%Y-%m-%d %H:%M:%S%z")102).to_dict(as_series=False) == {103"delivery_datetime": [104datetime(2021, 12, 5, 6, 0, tzinfo=timezone(timedelta)),105datetime(2021, 12, 5, 7, 0, tzinfo=timezone(timedelta)),106datetime(2021, 12, 5, 8, 0, tzinfo=timezone(timedelta)),107]108}109110111def test_to_date_non_exact_strptime() -> None:112s = pl.Series("a", ["2022-01-16", "2022-01-17", "foo2022-01-18", "b2022-01-19ar"])113format = "%Y-%m-%d"114115result = s.str.to_date(format, strict=False, exact=True)116expected = pl.Series("a", [date(2022, 1, 16), date(2022, 1, 17), None, None])117assert_series_equal(result, expected)118119result = s.str.to_date(format, strict=False, exact=False)120expected = pl.Series(121"a",122[date(2022, 1, 16), date(2022, 1, 17), date(2022, 1, 18), date(2022, 1, 19)],123)124assert_series_equal(result, expected)125126with pytest.raises(InvalidOperationError):127s.str.to_date(format, strict=True, exact=True)128129130@pytest.mark.parametrize(131("time_string", "expected"),132[133("01-02-2024", date(2024, 2, 1)),134("01.02.2024", date(2024, 2, 1)),135("01/02/2024", date(2024, 2, 1)),136("2024-02-01", date(2024, 2, 1)),137("2024/02/01", date(2024, 2, 1)),138("31-12-2024", date(2024, 12, 31)),139("31.12.2024", date(2024, 12, 31)),140("31/12/2024", date(2024, 12, 31)),141("2024-12-31", date(2024, 12, 31)),142("2024/12/31", date(2024, 12, 31)),143],144)145def test_to_date_all_inferred_date_patterns(time_string: str, expected: date) -> None:146result = pl.Series([time_string]).str.to_date()147assert result[0] == expected148149150@pytest.mark.parametrize(151("time_string", "expected"),152[153("2024-12-04 09:08:00", datetime(2024, 12, 4, 9, 8, 0)),154("2024-12-4 9:8:0", datetime(2024, 12, 4, 9, 8, 0)),155("2024/12/04 9:8", datetime(2024, 12, 4, 9, 8, 0)),156("4/12/2024 9:8", datetime(2024, 12, 4, 9, 8, 0)),157],158)159def test_to_datetime_infer_missing_digit_in_time_16092(160time_string: str, expected: datetime161) -> None:162result = pl.Series([time_string]).str.to_datetime()163assert result[0] == expected164165166@pytest.mark.parametrize(167("value", "attr"),168[169("a", "to_date"),170("ab", "to_date"),171("a", "to_datetime"),172("ab", "to_datetime"),173],174)175def test_non_exact_short_elements_10223(value: str, attr: str) -> None:176with pytest.raises((InvalidOperationError, ComputeError)):177getattr(pl.Series(["2019-01-01", value]).str, attr)(exact=False)178179180@pytest.mark.parametrize(181("offset", "time_zone", "tzinfo", "format"),182[183("+01:00", "UTC", timezone(timedelta(hours=1)), "%Y-%m-%dT%H:%M%z"),184("", None, None, "%Y-%m-%dT%H:%M"),185],186)187def test_to_datetime_non_exact_strptime(188offset: str, time_zone: str | None, tzinfo: timezone | None, format: str189) -> None:190s = pl.Series(191"a",192[193f"2022-01-16T00:00{offset}",194f"2022-01-17T00:00{offset}",195f"foo2022-01-18T00:00{offset}",196f"b2022-01-19T00:00{offset}ar",197],198)199200result = s.str.to_datetime(format, strict=False, exact=True)201expected = pl.Series(202"a",203[204datetime(2022, 1, 16, tzinfo=tzinfo),205datetime(2022, 1, 17, tzinfo=tzinfo),206None,207None,208],209)210assert_series_equal(result, expected)211assert result.dtype == pl.Datetime("us", time_zone)212213result = s.str.to_datetime(format, strict=False, exact=False)214expected = pl.Series(215"a",216[217datetime(2022, 1, 16, tzinfo=tzinfo),218datetime(2022, 1, 17, tzinfo=tzinfo),219datetime(2022, 1, 18, tzinfo=tzinfo),220datetime(2022, 1, 19, tzinfo=tzinfo),221],222)223assert_series_equal(result, expected)224assert result.dtype == pl.Datetime("us", time_zone)225226with pytest.raises(InvalidOperationError):227s.str.to_datetime(format, strict=True, exact=True)228229230def test_to_datetime_dates_datetimes() -> None:231s = pl.Series("date", ["2021-04-22", "2022-01-04 00:00:00"])232assert s.str.to_datetime().to_list() == [233datetime(2021, 4, 22, 0, 0),234datetime(2022, 1, 4, 0, 0),235]236237238@pytest.mark.parametrize(239("time_string", "expected"),240[241("09-05-2019", datetime(2019, 5, 9)),242("2018-09-05", datetime(2018, 9, 5)),243("2018-09-05T04:05:01", datetime(2018, 9, 5, 4, 5, 1)),244("2018-09-05T04:24:01.9", datetime(2018, 9, 5, 4, 24, 1, 900000)),245("2018-09-05T04:24:02.11", datetime(2018, 9, 5, 4, 24, 2, 110000)),246("2018-09-05T14:24:02.123", datetime(2018, 9, 5, 14, 24, 2, 123000)),247("2019-04-18T02:45:55.555000000", datetime(2019, 4, 18, 2, 45, 55, 555000)),248("2019-04-18T22:45:55.555123", datetime(2019, 4, 18, 22, 45, 55, 555123)),249(250"2018-09-05T04:05:01+01:00",251datetime(2018, 9, 5, 4, 5, 1, tzinfo=timezone(timedelta(hours=1))),252),253(254"2018-09-05T04:24:01.9+01:00",255datetime(2018, 9, 5, 4, 24, 1, 900000, tzinfo=timezone(timedelta(hours=1))),256),257(258"2018-09-05T04:24:02.11+01:00",259datetime(2018, 9, 5, 4, 24, 2, 110000, tzinfo=timezone(timedelta(hours=1))),260),261(262"2018-09-05T14:24:02.123+01:00",263datetime(2642018, 9, 5, 14, 24, 2, 123000, tzinfo=timezone(timedelta(hours=1))265),266),267(268"2019-04-18T02:45:55.555000000+01:00",269datetime(2702019, 4, 18, 2, 45, 55, 555000, tzinfo=timezone(timedelta(hours=1))271),272),273(274"2019-04-18T22:45:55.555123+01:00",275datetime(2762019, 4, 18, 22, 45, 55, 555123, tzinfo=timezone(timedelta(hours=1))277),278),279],280)281def test_to_datetime_patterns_single(time_string: str, expected: str) -> None:282result = pl.Series([time_string]).str.to_datetime().item()283assert result == expected284285286@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])287def test_infer_tz_aware_time_unit(time_unit: TimeUnit) -> None:288result = pl.Series(["2020-01-02T04:00:00+02:00"]).str.to_datetime(289time_unit=time_unit290)291assert result.dtype == pl.Datetime(time_unit, "UTC")292assert result.item() == datetime(2020, 1, 2, 2, 0, tzinfo=timezone.utc)293294295@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])296def test_infer_tz_aware_with_utc(time_unit: TimeUnit) -> None:297result = pl.Series(["2020-01-02T04:00:00+02:00"]).str.to_datetime(298time_unit=time_unit299)300assert result.dtype == pl.Datetime(time_unit, "UTC")301assert result.item() == datetime(2020, 1, 2, 2, 0, tzinfo=timezone.utc)302303304def test_str_to_datetime_infer_tz_aware() -> None:305result = (306pl.Series(["2020-01-02T04:00:00+02:00"])307.str.to_datetime(time_unit="us", time_zone="Europe/Vienna")308.item()309)310assert result == datetime(2020, 1, 2, 3, tzinfo=ZoneInfo("Europe/Vienna"))311312313@pytest.mark.parametrize(314"result",315[316pl.Series(["2020-01-01T00:00:00+00:00"]).str.strptime(317pl.Datetime("us", "UTC"), format="%Y-%m-%dT%H:%M:%S%z"318),319pl.Series(["2020-01-01T00:00:00+00:00"]).str.strptime(320pl.Datetime("us"), format="%Y-%m-%dT%H:%M:%S%z"321),322pl.Series(["2020-01-01T00:00:00+00:00"]).str.strptime(pl.Datetime("us", "UTC")),323pl.Series(["2020-01-01T00:00:00+00:00"]).str.strptime(pl.Datetime("us")),324pl.Series(["2020-01-01T00:00:00+00:00"]).str.to_datetime(325time_zone="UTC", format="%Y-%m-%dT%H:%M:%S%z"326),327pl.Series(["2020-01-01T00:00:00+00:00"]).str.to_datetime(328format="%Y-%m-%dT%H:%M:%S%z"329),330pl.Series(["2020-01-01T00:00:00+00:00"]).str.to_datetime(time_zone="UTC"),331pl.Series(["2020-01-01T00:00:00+00:00"]).str.to_datetime(),332],333)334def test_parsing_offset_aware_with_utc_dtype(result: pl.Series) -> None:335expected = pl.Series([datetime(2020, 1, 1, tzinfo=timezone.utc)])336assert_series_equal(result, expected)337338339def test_datetime_strptime_patterns_consistent() -> None:340# note that all should be year first341df = pl.Series(342"date",343[344"2018-09-05",345"2018-09-05T04:05:01",346"2018-09-05T04:24:01.9",347"2018-09-05T04:24:02.11",348"2018-09-05T14:24:02.123",349"2018-09-05T14:24:02.123Z",350"2019-04-18T02:45:55.555000000",351"2019-04-18T22:45:55.555123",352],353).to_frame()354s = df.with_columns(355pl.col("date").str.to_datetime(strict=False).alias("parsed"),356)["parsed"]357assert s.null_count() == 1358assert s[5] is None359360361def test_datetime_strptime_patterns_inconsistent() -> None:362# note that the pattern is inferred from the first element to363# be DatetimeDMY, and so the others (correctly) parse as `null`.364df = pl.Series(365"date",366[367"09-05-2019",368"2018-09-05",369"2018-09-05T04:05:01",370"2018-09-05T04:24:01.9",371"2018-09-05T04:24:02.11",372"2018-09-05T14:24:02.123",373"2018-09-05T14:24:02.123Z",374"2019-04-18T02:45:55.555000000",375"2019-04-18T22:45:55.555123",376],377).to_frame()378s = df.with_columns(pl.col("date").str.to_datetime(strict=False).alias("parsed"))[379"parsed"380]381assert s.null_count() == 8382assert s[0] is not None383384385@pytest.mark.parametrize(386(387"ts",388"format",389"exp_year",390"exp_month",391"exp_day",392"exp_hour",393"exp_minute",394"exp_second",395),396[397("-0031-04-24 22:13:20", "%Y-%m-%d %H:%M:%S", -31, 4, 24, 22, 13, 20),398("-0031-04-24", "%Y-%m-%d", -31, 4, 24, 0, 0, 0),399],400)401def test_parse_negative_dates(402ts: str,403format: str,404exp_year: int,405exp_month: int,406exp_day: int,407exp_hour: int,408exp_minute: int,409exp_second: int,410) -> None:411s = pl.Series([ts])412result = s.str.to_datetime(format, time_unit="ms")413# Python datetime.datetime doesn't support negative dates, so comparing414# with `result.item()` directly won't work.415assert result.dt.year().item() == exp_year416assert result.dt.month().item() == exp_month417assert result.dt.day().item() == exp_day418assert result.dt.hour().item() == exp_hour419assert result.dt.minute().item() == exp_minute420assert result.dt.second().item() == exp_second421422423def test_short_formats() -> None:424s = pl.Series(["20202020", "2020"])425assert s.str.to_date("%Y", strict=False).to_list() == [426None,427date(2020, 1, 1),428]429assert s.str.to_date("%bar", strict=False).to_list() == [None, None]430431432@pytest.mark.parametrize(433("time_string", "fmt", "datatype", "expected"),434[435("Jul/2020", "%b/%Y", pl.Date, date(2020, 7, 1)),436("Jan/2020", "%b/%Y", pl.Date, date(2020, 1, 1)),437("02/Apr/2020", "%d/%b/%Y", pl.Date, date(2020, 4, 2)),438("Dec/2020", "%b/%Y", pl.Datetime, datetime(2020, 12, 1, 0, 0)),439("Nov/2020", "%b/%Y", pl.Datetime, datetime(2020, 11, 1, 0, 0)),440("02/Feb/2020", "%d/%b/%Y", pl.Datetime, datetime(2020, 2, 2, 0, 0)),441],442)443def test_strptime_abbrev_month(444time_string: str, fmt: str, datatype: PolarsTemporalType, expected: date445) -> None:446s = pl.Series([time_string])447result = s.str.strptime(datatype, fmt).item()448assert result == expected449450451def test_full_month_name() -> None:452s = pl.Series(["2022-December-01"]).str.to_datetime("%Y-%B-%d")453assert s[0] == datetime(2022, 12, 1)454455456@pytest.mark.parametrize(457("datatype", "expected"),458[459(pl.Datetime, datetime(2022, 1, 1)),460(pl.Date, date(2022, 1, 1)),461],462)463def test_single_digit_month(464datatype: PolarsTemporalType, expected: datetime | date465) -> None:466s = pl.Series(["2022-1-1"]).str.strptime(datatype, "%Y-%m-%d")467assert s[0] == expected468469470def test_invalid_date_parsing_4898() -> None:471assert pl.Series(["2022-09-18", "2022-09-50"]).str.to_date(472"%Y-%m-%d", strict=False473).to_list() == [date(2022, 9, 18), None]474475476def test_strptime_invalid_timezone() -> None:477ts = pl.Series(["2020-01-01 00:00:00+01:00"]).str.to_datetime("%Y-%m-%d %H:%M:%S%z")478with pytest.raises(ComputeError, match=r"unable to parse time zone: 'foo'"):479ts.dt.replace_time_zone("foo")480481482def test_to_datetime_ambiguous_or_non_existent() -> None:483with pytest.raises(484ComputeError,485match="datetime '2021-11-07 01:00:00' is ambiguous in time zone 'US/Central'",486):487pl.Series(["2021-11-07 01:00"]).str.to_datetime(488time_unit="us", time_zone="US/Central"489)490with pytest.raises(491ComputeError,492match="datetime '2021-03-28 02:30:00' is non-existent in time zone 'Europe/Warsaw'",493):494pl.Series(["2021-03-28 02:30"]).str.to_datetime(495time_unit="us", time_zone="Europe/Warsaw"496)497with pytest.raises(498ComputeError,499match="datetime '2021-03-28 02:30:00' is non-existent in time zone 'Europe/Warsaw'",500):501pl.Series(["2021-03-28 02:30"]).str.to_datetime(502time_unit="us",503time_zone="Europe/Warsaw",504ambiguous="null",505)506with pytest.raises(507ComputeError,508match="datetime '2021-03-28 02:30:00' is non-existent in time zone 'Europe/Warsaw'",509):510pl.Series(["2021-03-28 02:30"] * 2).str.to_datetime(511time_unit="us",512time_zone="Europe/Warsaw",513ambiguous=pl.Series(["null", "null"]),514)515516517@pytest.mark.parametrize(518("ts", "fmt", "expected"),519[520("2020-01-01T00:00:00Z", None, datetime(2020, 1, 1, tzinfo=timezone.utc)),521("2020-01-01T00:00:00Z", "%+", datetime(2020, 1, 1, tzinfo=timezone.utc)),522(523"2020-01-01T00:00:00+01:00",524"%Y-%m-%dT%H:%M:%S%z",525datetime(2020, 1, 1, tzinfo=timezone(timedelta(seconds=3600))),526),527(528"2020-01-01T00:00:00+01:00",529"%Y-%m-%dT%H:%M:%S%:z",530datetime(2020, 1, 1, tzinfo=timezone(timedelta(seconds=3600))),531),532(533"2020-01-01T00:00:00+01:00",534"%Y-%m-%dT%H:%M:%S%#z",535datetime(2020, 1, 1, tzinfo=timezone(timedelta(seconds=3600))),536),537],538)539def test_to_datetime_tz_aware_strptime(ts: str, fmt: str, expected: datetime) -> None:540result = pl.Series([ts]).str.to_datetime(fmt).item()541assert result == expected542543544@pytest.mark.parametrize("format", ["%+", "%Y-%m-%dT%H:%M:%S%z"])545def test_crossing_dst(format: str) -> None:546ts = ["2021-03-27T23:59:59+01:00", "2021-03-28T23:59:59+02:00"]547result = pl.Series(ts).str.to_datetime(format)548assert result[0] == datetime(2021, 3, 27, 22, 59, 59, tzinfo=ZoneInfo("UTC"))549assert result[1] == datetime(2021, 3, 28, 21, 59, 59, tzinfo=ZoneInfo("UTC"))550551552@pytest.mark.parametrize("format", ["%+", "%Y-%m-%dT%H:%M:%S%z"])553def test_crossing_dst_tz_aware(format: str) -> None:554ts = ["2021-03-27T23:59:59+01:00", "2021-03-28T23:59:59+02:00"]555result = pl.Series(ts).str.to_datetime(format)556expected = pl.Series(557[558datetime(2021, 3, 27, 22, 59, 59, tzinfo=timezone.utc),559datetime(2021, 3, 28, 21, 59, 59, tzinfo=timezone.utc),560]561)562assert_series_equal(result, expected)563564565@pytest.mark.parametrize(566("data", "format", "expected"),567[568(569"2023-02-05T05:10:10.074000",570"%Y-%m-%dT%H:%M:%S%.f",571datetime(2023, 2, 5, 5, 10, 10, 74000),572),573],574)575def test_strptime_subseconds_datetime(data: str, format: str, expected: time) -> None:576s = pl.Series([data])577result = s.str.to_datetime(format).item()578assert result == expected579580581@pytest.mark.parametrize(582("string", "fmt"),583[584pytest.param("2023-05-04|7", "%Y-%m-%d|%H", id="hour but no minute"),585pytest.param("2023-05-04|7", "%Y-%m-%d|%k", id="padded hour but no minute"),586pytest.param("2023-05-04|10", "%Y-%m-%d|%M", id="minute but no hour"),587pytest.param("2023-05-04|10", "%Y-%m-%d|%S", id="second but no hour"),588pytest.param(589"2000-Jan-01 01 00 01", "%Y-%b-%d %I %M %S", id="12-hour clock but no AM/PM"590),591pytest.param(592"2000-Jan-01 01 00 01",593"%Y-%b-%d %l %M %S",594id="padded 12-hour clock but no AM/PM",595),596],597)598def test_strptime_incomplete_formats(string: str, fmt: str) -> None:599with pytest.raises(600ComputeError,601match="Invalid format string",602):603pl.Series([string]).str.to_datetime(fmt)604605606@pytest.mark.parametrize(607("string", "fmt", "expected"),608[609("2023-05-04|7:3", "%Y-%m-%d|%H:%M", datetime(2023, 5, 4, 7, 3)),610("2023-05-04|10:03", "%Y-%m-%d|%H:%M", datetime(2023, 5, 4, 10, 3)),611(612"2000-Jan-01 01 00 01 am",613"%Y-%b-%d %I %M %S %P",614datetime(2000, 1, 1, 1, 0, 1),615),616(617"2000-Jan-01 01 00 01 am",618"%Y-%b-%d %_I %M %S %P",619datetime(2000, 1, 1, 1, 0, 1),620),621(622"2000-Jan-01 01 00 01 am",623"%Y-%b-%d %l %M %S %P",624datetime(2000, 1, 1, 1, 0, 1),625),626(627"2000-Jan-01 01 00 01 AM",628"%Y-%b-%d %I %M %S %p",629datetime(2000, 1, 1, 1, 0, 1),630),631(632"2000-Jan-01 01 00 01 AM",633"%Y-%b-%d %_I %M %S %p",634datetime(2000, 1, 1, 1, 0, 1),635),636(637"2000-Jan-01 01 00 01 AM",638"%Y-%b-%d %l %M %S %p",639datetime(2000, 1, 1, 1, 0, 1),640),641],642)643def test_strptime_complete_formats(string: str, fmt: str, expected: datetime) -> None:644# Similar to the above, but these formats are complete and should work645result = pl.Series([string]).str.to_datetime(fmt).item()646assert result == expected647648649@pytest.mark.parametrize(650("data", "format", "expected"),651[652("00:00:00.000005000", "%H:%M:%S%.f", time(0, 0, 0, 5)),653("01:23:10.000500", "%H:%M:%S%.6f", time(1, 23, 10, 500)),654("08:10:11.000", "%H:%M:%S%.3f", time(8, 10, 11)),655("15:50:25", "%T", time(15, 50, 25)),656("22:35", "%R", time(22, 35)),657],658)659def test_to_time_inferred(data: str, format: str, expected: time) -> None:660df = pl.DataFrame({"tmstr": [data]})661expected_df = df.with_columns(tm=pl.Series("tm", values=[expected]))662for fmt in (format, None):663res = df.with_columns(tm=pl.col("tmstr").str.to_time(fmt))664assert_frame_equal(res, expected_df)665666667@pytest.mark.parametrize(668("data", "format", "expected"),669[670("05:10:11.740000", "%H:%M:%S%.f", time(5, 10, 11, 740000)),671("13:20:12.000074", "%T%.6f", time(13, 20, 12, 74)),672("21:30:13.007400", "%H:%M:%S%.3f", time(21, 30, 13, 7400)),673],674)675def test_to_time_subseconds(data: str, format: str, expected: time) -> None:676s = pl.Series([data])677for res in (678s.str.to_time().item(),679s.str.to_time(format).item(),680):681assert res == expected682683684def test_to_time_format_warning() -> None:685s = pl.Series(["05:10:10.074000"])686with pytest.warns(ChronoFormatWarning, match=".%f"):687result = s.str.to_time("%H:%M:%S.%f").item()688assert result == time(5, 10, 10, 74)689690691@pytest.mark.parametrize("exact", [True, False])692def test_to_datetime_ambiguous_earliest(exact: bool) -> None:693result = (694pl.Series(["2020-10-25 01:00"])695.str.to_datetime(time_zone="Europe/London", ambiguous="earliest", exact=exact)696.item()697)698expected = datetime(2020, 10, 25, 1, fold=0, tzinfo=ZoneInfo("Europe/London"))699assert result == expected700result = (701pl.Series(["2020-10-25 01:00"])702.str.to_datetime(time_zone="Europe/London", ambiguous="latest", exact=exact)703.item()704)705expected = datetime(2020, 10, 25, 1, fold=1, tzinfo=ZoneInfo("Europe/London"))706assert result == expected707with pytest.raises(ComputeError):708pl.Series(["2020-10-25 01:00"]).str.to_datetime(709time_zone="Europe/London",710exact=exact,711).item()712713714def test_to_datetime_naive_format_and_time_zone() -> None:715# format-specified path716result = pl.Series(["2020-01-01"]).str.to_datetime(717format="%Y-%m-%d", time_zone="Asia/Kathmandu"718)719expected = pl.Series([datetime(2020, 1, 1)]).dt.replace_time_zone("Asia/Kathmandu")720assert_series_equal(result, expected)721# format-inferred path722result = pl.Series(["2020-01-01"]).str.to_datetime(time_zone="Asia/Kathmandu")723assert_series_equal(result, expected)724725726@pytest.mark.parametrize("exact", [True, False])727def test_strptime_ambiguous_earliest(exact: bool) -> None:728result = (729pl.Series(["2020-10-25 01:00"])730.str.strptime(731pl.Datetime("us", "Europe/London"), ambiguous="earliest", exact=exact732)733.item()734)735expected = datetime(2020, 10, 25, 1, fold=0, tzinfo=ZoneInfo("Europe/London"))736assert result == expected737result = (738pl.Series(["2020-10-25 01:00"])739.str.strptime(740pl.Datetime("us", "Europe/London"), ambiguous="latest", exact=exact741)742.item()743)744expected = datetime(2020, 10, 25, 1, fold=1, tzinfo=ZoneInfo("Europe/London"))745assert result == expected746with pytest.raises(ComputeError):747pl.Series(["2020-10-25 01:00"]).str.strptime(748pl.Datetime("us", "Europe/London"),749exact=exact,750).item()751752753@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])754def test_to_datetime_out_of_range_13401(time_unit: TimeUnit) -> None:755s = pl.Series(["2020-January-01 12:34:66"])756with pytest.raises(InvalidOperationError, match="conversion .* failed"):757s.str.to_datetime("%Y-%B-%d %H:%M:%S", time_unit=time_unit)758assert (759s.str.to_datetime("%Y-%B-%d %H:%M:%S", strict=False, time_unit=time_unit).item()760is None761)762763764def test_out_of_ns_range_no_tu_specified_13592() -> None:765df = pl.DataFrame({"dates": ["2022-08-31 00:00:00.0", "0920-09-18 00:00:00.0"]})766result = df.select(pl.col("dates").str.to_datetime(format="%Y-%m-%d %H:%M:%S%.f"))[767"dates"768]769expected = pl.Series(770"dates",771[datetime(2022, 8, 31, 0, 0), datetime(920, 9, 18, 0, 0)],772dtype=pl.Datetime("us"),773)774assert_series_equal(result, expected)775776777def test_wrong_format_percent() -> None:778with pytest.raises(InvalidOperationError):779pl.Series(["2019-01-01"]).str.strptime(pl.Date, format="d%")780781782def test_polars_parser_fooled_by_trailing_nonsense_22167() -> None:783with pytest.raises(InvalidOperationError):784pl.Series(["2025-04-06T18:57:42.77756192Z"]).str.to_datetime(785"%Y-%m-%dT%H:%M:%S.%9fcabbagebananapotato"786)787with pytest.raises(InvalidOperationError):788pl.Series(["2025-04-06T18:57:42.77756192Z"]).str.to_datetime(789"%Y-%m-%dT%H:%M:%S.%9f#z"790)791with pytest.raises(InvalidOperationError):792pl.Series(["2025-04-06T18:57:42.77Z"]).str.to_datetime(793"%Y-%m-%dT%H:%M:%S.%3f#z"794)795with pytest.raises(InvalidOperationError):796pl.Series(["2025-04-06T18:57:42.77123Z"]).str.to_datetime(797"%Y-%m-%dT%H:%M:%S.%6f#z"798)799800801def test_strptime_empty_input_22214() -> None:802s = pl.Series("x", [], pl.String)803804assert s.str.strptime(pl.Time, "%H:%M:%S%.f").is_empty()805assert s.str.strptime(pl.Date, "%Y-%m-%d").is_empty()806assert s.str.strptime(pl.Datetime, "%Y-%m-%d %H:%M%#z").is_empty()807808809@pytest.mark.parametrize(810"value",811[812"31/12/2022",813"banana",814"12-345-678",815"12-345-67",816"12-345-6789",817"123*45*678",818"123x45x678",819"123x45x678x",820],821)822def test_matching_strings_but_different_format_22495(value: str) -> None:823s = pl.Series("my_strings", [value])824result = s.str.to_date("%Y-%m-%d", strict=False).item()825assert result is None826827828def test_date_parse_omit_day_month() -> None:829fmt_B = "%Y %B"830fmt_b = "%Y %b"831df = (832pl.select(date=pl.date_range(pl.date(2022, 1, 1), pl.date(2022, 12, 1), "1mo"))833.with_columns(834strdateB=pl.col("date").dt.strftime(fmt_B),835strdateb=pl.col("date").dt.strftime(fmt_b),836)837.with_columns(838round_tripB=pl.col("strdateB").str.strptime(pl.Date, fmt_B),839round_tripb=pl.col("strdateb").str.strptime(pl.Date, fmt_b),840)841)842check = df.filter(843~pl.all_horizontal(844pl.col("date") == pl.col("round_tripB"),845pl.col("date") == pl.col("round_tripb"),846)847)848assert check.height == 0849850s = pl.Series(851[852"2022 January",853"2022 February",854"2022 March",855"2022 April",856"2022 May",857"2022 June",858"2022 July",859"2022 August",860"2022 September",861"2022 October",862"2022 November",863"2022 December",864]865)866result = s.str.strptime(pl.Date, "%Y %B")867expected = pl.Series(868[869date(2022, 1, 1),870date(2022, 2, 1),871date(2022, 3, 1),872date(2022, 4, 1),873date(2022, 5, 1),874date(2022, 6, 1),875date(2022, 7, 1),876date(2022, 8, 1),877date(2022, 9, 1),878date(2022, 10, 1),879date(2022, 11, 1),880date(2022, 12, 1),881]882)883assert_series_equal(result, expected)884885886@pytest.mark.parametrize("length", [1, 5])887def test_eager_inference_on_expr(length: int) -> None:888s = pl.Series("a", ["2025-04-06T18:57:42.77123Z"] * length)889890assert_series_equal(891s.str.strptime(pl.Datetime),892pl.Series(893"a",894[895datetime(8962025, 4, 6, 18, 57, 42, 771230, tzinfo=timezone(timedelta(hours=0))897)898]899* length,900),901)902903with pytest.raises(904ComputeError,905match="`strptime` / `to_datetime` was called with no format and no time zone, but a time zone is part of the data",906):907s.to_frame().select(pl.col("a").str.strptime(pl.Datetime))908909910