Path: blob/main/py-polars/tests/unit/operations/arithmetic/test_arithmetic.py
8406 views
from __future__ import annotations12import operator3from collections import OrderedDict4from datetime import date, datetime, timedelta5from typing import TYPE_CHECKING, Any67import numpy as np8import pytest910import polars as pl11from polars import (12Date,13Float64,14Int8,15Int16,16Int32,17Int64,18UInt8,19UInt16,20UInt32,21UInt64,22)23from polars.exceptions import ColumnNotFoundError, InvalidOperationError24from polars.testing import assert_frame_equal, assert_series_equal25from tests.unit.conftest import INTEGER_DTYPES, NUMERIC_DTYPES, UNSIGNED_INTEGER_DTYPES2627if TYPE_CHECKING:28from collections.abc import Callable2930from polars._typing import PolarsIntegerType313233def test_sqrt_neg_inf() -> None:34out = pl.DataFrame(35{36"val": [float("-Inf"), -9, 0, 9, float("Inf")],37}38).with_columns(pl.col("val").sqrt().alias("sqrt"))39# comparing nans and infinities by string value as they are not cmp40assert str(out["sqrt"].to_list()) == str(41[float("nan"), float("nan"), 0.0, 3.0, float("Inf")]42)434445def test_arithmetic_with_logical_on_series_4920() -> None:46assert (pl.Series([date(2022, 6, 3)]) - date(2022, 1, 1)).dtype == pl.Duration("us")474849@pytest.mark.parametrize(50("left", "right", "expected_value", "expected_dtype"),51[52(date(2021, 1, 1), date(2020, 1, 1), timedelta(days=366), pl.Duration("us")),53(54datetime(2021, 1, 1),55datetime(2020, 1, 1),56timedelta(days=366),57pl.Duration("us"),58),59(timedelta(days=1), timedelta(days=2), timedelta(days=-1), pl.Duration("us")),60(2.0, 3.0, -1.0, pl.Float64),61],62)63def test_arithmetic_sub(64left: object, right: object, expected_value: object, expected_dtype: pl.DataType65) -> None:66result = left - pl.Series([right])67expected = pl.Series("", [expected_value], dtype=expected_dtype)68assert_series_equal(result, expected)69result = pl.Series([left]) - right70assert_series_equal(result, expected)717273def test_struct_arithmetic() -> None:74df = pl.DataFrame(75{76"a": [1, 2],77"b": [3, 4],78"c": [5, 6],79}80).select(pl.cum_sum_horizontal("a", "c"))8182q = df.lazy().select(pl.col("cum_sum") * 2)83out = q.collect()84assert out.to_dict(as_series=False) == {85"cum_sum": [{"a": 2, "c": 12}, {"a": 4, "c": 16}]86}87assert q.collect_schema() == out.schema8889q = df.lazy().select(pl.col("cum_sum") - 2)90out = q.collect()91assert out.to_dict(as_series=False) == {92"cum_sum": [{"a": -1, "c": 4}, {"a": 0, "c": 6}]93}94assert q.collect_schema() == out.schema9596q = df.lazy().select(pl.col("cum_sum") + 2)97out = q.collect()98assert out.to_dict(as_series=False) == {99"cum_sum": [{"a": 3, "c": 8}, {"a": 4, "c": 10}]100}101assert q.collect_schema() == out.schema102103q = df.lazy().select(pl.col("cum_sum") / 2)104out = q.collect()105assert out.to_dict(as_series=False) == {106"cum_sum": [{"a": 0.5, "c": 3.0}, {"a": 1.0, "c": 4.0}]107}108assert q.collect_schema() == out.schema109110q = df.lazy().select(pl.col("cum_sum") // 2)111out = q.collect()112assert out.to_dict(as_series=False) == {113"cum_sum": [{"a": 0, "c": 3}, {"a": 1, "c": 4}]114}115assert q.collect_schema() == out.schema116117# inline, this checks cum_sum reports the right output type118assert pl.DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}).select(119pl.cum_sum_horizontal("a", "c") * 3120).to_dict(as_series=False) == {"cum_sum": [{"a": 3, "c": 18}, {"a": 6, "c": 24}]}121122123def test_simd_float_sum_determinism() -> None:124out = []125for _ in range(10):126a = pl.Series(127[1280.021415853782953836,1290.06234123511682772,1300.016962384922753124,1310.002595968402539279,1320.007632765529696731,1330.012105848332077212,1340.021439787151032317,1350.3223049133700719,1360.10526670729539435,1370.0859029285522487,138]139)140out.append(a.sum())141142assert out == [1430.6579683924555951,1440.6579683924555951,1450.6579683924555951,1460.6579683924555951,1470.6579683924555951,1480.6579683924555951,1490.6579683924555951,1500.6579683924555951,1510.6579683924555951,1520.6579683924555951,153]154155156def test_floor_division_float_int_consistency() -> None:157a = np.random.randn(10) * 10158159assert (pl.Series(a) // 5).to_list() == list(a // 5)160assert (pl.Series(a, dtype=pl.Int32) // 5).to_list() == list(161(a.astype(int) // 5).astype(int)162)163164165def test_series_expr_arithm() -> None:166s = pl.Series([1, 2, 3])167assert (s + pl.col("a")).meta == pl.lit(s) + pl.col("a")168assert (s - pl.col("a")).meta == pl.lit(s) - pl.col("a")169assert (s / pl.col("a")).meta == pl.lit(s) / pl.col("a")170assert (s // pl.col("a")).meta == pl.lit(s) // pl.col("a")171assert (s * pl.col("a")).meta == pl.lit(s) * pl.col("a")172assert (s % pl.col("a")).meta == pl.lit(s) % pl.col("a")173174175def test_fused_arithm() -> None:176df = pl.DataFrame(177{178"a": [1, 2, 3],179"b": [10, 20, 30],180"c": [5, 5, 5],181}182)183184q = df.lazy().select(185pl.col("a") * pl.col("b") + pl.col("c"),186(pl.col("a") + pl.col("b") * pl.col("c")).alias("2"),187)188# the extra aliases are because the fma does operation reordering189assert (190"""col("c").fma([col("a"), col("b")]).alias("a"), col("a").fma([col("b"), col("c")]).alias("2")"""191in q.explain()192)193assert q.collect().to_dict(as_series=False) == {194"a": [15, 45, 95],195"2": [51, 102, 153],196}197# fsm198q = df.lazy().select(pl.col("a") - pl.col("b") * pl.col("c"))199assert """col("a").fsm([col("b"), col("c")])""" in q.explain()200assert q.collect()["a"].to_list() == [-49, -98, -147]201# fms202q = df.lazy().select(pl.col("a") * pl.col("b") - pl.col("c"))203assert """col("a").fms([col("b"), col("c")])""" in q.explain()204assert q.collect()["a"].to_list() == [5, 35, 85]205206# check if we constant fold instead of fma207q = df.lazy().select(pl.lit(1) * pl.lit(2) - pl.col("c"))208assert """(2) - (col("c")""" in q.explain()209210# Check if fused is turned off for literals see: #9857211for expr in [212pl.col("c") * 2 + 5,213pl.col("c") * 2 + pl.col("c"),214pl.col("c") * 2 - 5,215pl.col("c") * 2 - pl.col("c"),2165 - pl.col("c") * 2,217pl.col("c") - pl.col("c") * 2,218]:219q = df.lazy().select(expr)220assert all(el not in q.explain() for el in ["fms", "fsm", "fma"]), (221f"Fused Arithmetic applied on literal {expr}: {q.explain()}"222)223224225def test_literal_no_upcast() -> None:226df = pl.DataFrame({"a": pl.Series([1, 2, 3], dtype=pl.Float32)})227228q = (229df.lazy()230.select(231(pl.col("a") * -5 + 2).alias("fma"),232(2 - pl.col("a") * 5).alias("fsm"),233(pl.col("a") * 5 - 2).alias("fms"),234)235.collect()236)237assert set(q.schema.values()) == {pl.Float32}, (238"Literal * Column (Float32) should not lead upcast"239)240241242def test_boolean_addition() -> None:243s = pl.DataFrame(244{"a": [True, False, False], "b": [True, False, True]}245).sum_horizontal()246247assert s.dtype == pl.get_index_type()248assert s.to_list() == [2, 0, 1]249df = pl.DataFrame(250{"a": [True], "b": [False]},251).select(pl.sum_horizontal("a", "b"))252assert df.dtypes == [pl.get_index_type()]253254255def test_bitwise_6311() -> None:256df = pl.DataFrame({"col1": [0, 1, 2, 3], "flag": [0, 0, 0, 0]})257258assert (259df.with_columns(260pl.when((pl.col("col1") < 1) | (pl.col("col1") >= 3))261.then(pl.col("flag") | 2) # set flag b0010262.otherwise(pl.col("flag"))263).with_columns(264pl.when(pl.col("col1") > -1)265.then(pl.col("flag") | 4)266.otherwise(pl.col("flag"))267)268).to_dict(as_series=False) == {"col1": [0, 1, 2, 3], "flag": [6, 4, 4, 6]}269270271def test_arithmetic_null_count() -> None:272df = pl.DataFrame({"a": [1, None, 2], "b": [None, 2, 1]})273out = df.select(274no_broadcast=pl.col("a") + pl.col("b"),275broadcast_left=1 + pl.col("b"),276broadcast_right=pl.col("a") + 1,277)278assert out.null_count().to_dict(as_series=False) == {279"no_broadcast": [2],280"broadcast_left": [1],281"broadcast_right": [1],282}283284285@pytest.mark.parametrize(286"op",287[288operator.add,289operator.floordiv,290operator.mod,291operator.mul,292operator.sub,293],294)295@pytest.mark.parametrize("dtype", NUMERIC_DTYPES)296def test_operator_arithmetic_with_nulls(op: Any, dtype: pl.DataType) -> None:297df = pl.DataFrame({"n": [2, 3]}, schema={"n": dtype})298s = df.to_series()299300df_expected = pl.DataFrame({"n": [None, None]}, schema={"n": dtype})301s_expected = df_expected.to_series()302303# validate expr, frame, and series behaviour with null value arithmetic304op_name = op.__name__305for null_expr in (None, pl.lit(None)):306assert_frame_equal(df_expected, df.select(op(pl.col("n"), null_expr)))307assert_frame_equal(308df_expected, df.select(getattr(pl.col("n"), op_name)(null_expr))309)310311assert_frame_equal(op(df, None), df_expected)312assert_series_equal(op(s, None), s_expected)313314315@pytest.mark.parametrize(316"op",317[318operator.add,319operator.mod,320operator.mul,321operator.sub,322],323)324def test_null_column_arithmetic(op: Any) -> None:325df = pl.DataFrame({"a": [None, None], "b": [None, None]})326expected_df = pl.DataFrame({"a": [None, None]})327328output_df = df.select(op(pl.col("a"), pl.col("b")))329assert_frame_equal(expected_df, output_df)330# test broadcast right331output_df = df.select(op(pl.col("a"), pl.Series([None])))332assert_frame_equal(expected_df, output_df)333# test broadcast left334output_df = df.select(op(pl.Series("a", [None]), pl.col("a")))335assert_frame_equal(expected_df, output_df)336337338def test_bool_floordiv() -> None:339df = pl.DataFrame({"x": [True]})340341with pytest.raises(342InvalidOperationError,343match="floor_div operation not supported for dtype `bool`",344):345df.with_columns(pl.col("x").floordiv(2))346347348def test_arithmetic_in_aggregation_3739() -> None:349def demean_dot() -> pl.Expr:350x = pl.col("x")351y = pl.col("y")352x1 = x - x.mean()353y1 = y - y.mean()354return (x1 * y1).sum().alias("demean_dot")355356assert (357pl.DataFrame(358{359"key": ["a", "a", "a", "a"],360"x": [4, 2, 2, 4],361"y": [2, 0, 2, 0],362}363)364.group_by("key")365.agg(366[367demean_dot(),368]369)370).to_dict(as_series=False) == {"key": ["a"], "demean_dot": [0.0]}371372373def test_arithmetic_on_df() -> None:374df = pl.DataFrame({"a": [1.0, 2.0], "b": [3.0, 4.0]})375376for df_mul in (df * 2, 2 * df):377expected = pl.DataFrame({"a": [2.0, 4.0], "b": [6.0, 8.0]})378assert_frame_equal(df_mul, expected)379380for df_plus in (df + 2, 2 + df):381expected = pl.DataFrame({"a": [3.0, 4.0], "b": [5.0, 6.0]})382assert_frame_equal(df_plus, expected)383384df_div = df / 2385expected = pl.DataFrame({"a": [0.5, 1.0], "b": [1.5, 2.0]})386assert_frame_equal(df_div, expected)387388df_minus = df - 2389expected = pl.DataFrame({"a": [-1.0, 0.0], "b": [1.0, 2.0]})390assert_frame_equal(df_minus, expected)391392df_mod = df % 2393expected = pl.DataFrame({"a": [1.0, 0.0], "b": [1.0, 0.0]})394assert_frame_equal(df_mod, expected)395396df2 = pl.DataFrame({"c": [10]})397398out = df + df2399expected = pl.DataFrame({"a": [11.0, None], "b": [None, None]}).with_columns(400pl.col("b").cast(pl.Float64)401)402assert_frame_equal(out, expected)403404out = df - df2405expected = pl.DataFrame({"a": [-9.0, None], "b": [None, None]}).with_columns(406pl.col("b").cast(pl.Float64)407)408assert_frame_equal(out, expected)409410out = df / df2411expected = pl.DataFrame({"a": [0.1, None], "b": [None, None]}).with_columns(412pl.col("b").cast(pl.Float64)413)414assert_frame_equal(out, expected)415416out = df * df2417expected = pl.DataFrame({"a": [10.0, None], "b": [None, None]}).with_columns(418pl.col("b").cast(pl.Float64)419)420assert_frame_equal(out, expected)421422out = df % df2423expected = pl.DataFrame({"a": [1.0, None], "b": [None, None]}).with_columns(424pl.col("b").cast(pl.Float64)425)426assert_frame_equal(out, expected)427428# cannot do arithmetic with a sequence429with pytest.raises(TypeError, match="operation not supported"):430_ = df + [1] # type: ignore[operator]431432433def test_df_series_division() -> None:434df = pl.DataFrame(435{436"a": [2, 2, 4, 4, 6, 6],437"b": [2, 2, 10, 5, 6, 6],438}439)440s = pl.Series([2, 2, 2, 2, 2, 2])441assert (df / s).to_dict(as_series=False) == {442"a": [1.0, 1.0, 2.0, 2.0, 3.0, 3.0],443"b": [1.0, 1.0, 5.0, 2.5, 3.0, 3.0],444}445assert (df // s).to_dict(as_series=False) == {446"a": [1, 1, 2, 2, 3, 3],447"b": [1, 1, 5, 2, 3, 3],448}449450451@pytest.mark.parametrize(452"s", [pl.Series([1, 2], dtype=Int64), pl.Series([1, 2], dtype=Float64)]453)454def test_arithmetic_series(s: pl.Series) -> None:455a = s456b = s457458assert ((a * b) == [1, 4]).sum() == 2459assert ((a / b) == [1.0, 1.0]).sum() == 2460assert ((a + b) == [2, 4]).sum() == 2461assert ((a - b) == [0, 0]).sum() == 2462assert ((a + 1) == [2, 3]).sum() == 2463assert ((a - 1) == [0, 1]).sum() == 2464assert ((a / 1) == [1.0, 2.0]).sum() == 2465assert ((a // 2) == [0, 1]).sum() == 2466assert ((a * 2) == [2, 4]).sum() == 2467assert ((2 + a) == [3, 4]).sum() == 2468assert ((1 - a) == [0, -1]).sum() == 2469assert ((2 * a) == [2, 4]).sum() == 2470471# integer division472assert_series_equal(1 / a, pl.Series([1.0, 0.5]))473expected = pl.Series([1, 0]) if s.dtype == Int64 else pl.Series([1.0, 0.5])474assert_series_equal(1 // a, expected)475# modulo476assert ((1 % a) == [0, 1]).sum() == 2477assert ((a % 1) == [0, 0]).sum() == 2478# negate479assert (-a == [-1, -2]).sum() == 2480# unary plus481assert (+a == a).all()482# wrong dtypes in rhs operands483assert ((1.0 - a) == [0.0, -1.0]).sum() == 2484assert ((1.0 / a) == [1.0, 0.5]).sum() == 2485assert ((1.0 * a) == [1, 2]).sum() == 2486assert ((1.0 + a) == [2, 3]).sum() == 2487assert ((1.0 % a) == [0, 1]).sum() == 2488489490def test_arithmetic_datetime() -> None:491a = pl.Series("a", [datetime(2021, 1, 1)])492with pytest.raises(TypeError):493a // 2494with pytest.raises(TypeError):495a / 2496with pytest.raises(TypeError):497a * 2498with pytest.raises(TypeError):499a % 2500with pytest.raises(501InvalidOperationError,502):503a**2504with pytest.raises(TypeError):5052 / a506with pytest.raises(TypeError):5072 // a508with pytest.raises(TypeError):5092 * a510with pytest.raises(TypeError):5112 % a512with pytest.raises(513InvalidOperationError,514):5152**a516517518def test_power_series() -> None:519a = pl.Series([1, 2], dtype=Int64)520b = pl.Series([None, 2.0], dtype=Float64)521c = pl.Series([date(2020, 2, 28), date(2020, 3, 1)], dtype=Date)522d = pl.Series([1, 2], dtype=UInt8)523e = pl.Series([1, 2], dtype=Int8)524f = pl.Series([1, 2], dtype=UInt16)525g = pl.Series([1, 2], dtype=Int16)526h = pl.Series([1, 2], dtype=UInt32)527i = pl.Series([1, 2], dtype=Int32)528j = pl.Series([1, 2], dtype=UInt64)529k = pl.Series([1, 2], dtype=Int64)530m = pl.Series([2**33, 2**33], dtype=UInt64)531532# pow533assert_series_equal(a**2, pl.Series([1, 4], dtype=Int64))534assert_series_equal(b**3, pl.Series([None, 8.0], dtype=Float64))535assert_series_equal(a**a, pl.Series([1, 4], dtype=Int64))536assert_series_equal(b**b, pl.Series([None, 4.0], dtype=Float64))537assert_series_equal(a**b, pl.Series([None, 4.0], dtype=Float64))538assert_series_equal(d**d, pl.Series([1, 4], dtype=UInt8))539assert_series_equal(e**d, pl.Series([1, 4], dtype=Int8))540assert_series_equal(f**d, pl.Series([1, 4], dtype=UInt16))541assert_series_equal(g**d, pl.Series([1, 4], dtype=Int16))542assert_series_equal(h**d, pl.Series([1, 4], dtype=UInt32))543assert_series_equal(i**d, pl.Series([1, 4], dtype=Int32))544assert_series_equal(j**d, pl.Series([1, 4], dtype=UInt64))545assert_series_equal(k**d, pl.Series([1, 4], dtype=Int64))546547with pytest.raises(548InvalidOperationError,549match="`pow` operation not supported for dtype `null` as exponent",550):551a ** pl.lit(None)552553with pytest.raises(554InvalidOperationError,555match="`pow` operation not supported for dtype `date` as base",556):557c**2558with pytest.raises(559InvalidOperationError,560match="`pow` operation not supported for dtype `date` as exponent",561):5622**c563564with pytest.raises(ColumnNotFoundError):565a ** "hi" # type: ignore[operator]566567# Raising to UInt64: raises if can't be downcast safely to UInt32...568with pytest.raises(569InvalidOperationError, match="conversion from `u64` to `u32` failed"570):571a**m572# ... but succeeds otherwise.573assert_series_equal(a**j, pl.Series([1, 4], dtype=Int64))574575# rpow576assert_series_equal(2.0**a, pl.Series(None, [2.0, 4.0], dtype=Float64))577assert_series_equal(2**b, pl.Series(None, [None, 4.0], dtype=Float64))578579with pytest.raises(ColumnNotFoundError):580"hi" ** a581582# Series.pow() method583assert_series_equal(a.pow(2), pl.Series([1, 4], dtype=Int64))584585586def test_rpow_name_20071() -> None:587result = 1 ** pl.Series("a", [1, 2])588expected = pl.Series("a", [1, 1], pl.Int32)589assert_series_equal(result, expected)590591592@pytest.mark.parametrize(593("expected", "expr", "column_names"),594[595(np.array([[2, 4], [6, 8]], dtype=np.int64), lambda a, b: a + b, ("a", "a")),596(np.array([[0, 0], [0, 0]], dtype=np.int64), lambda a, b: a - b, ("a", "a")),597(np.array([[1, 4], [9, 16]], dtype=np.int64), lambda a, b: a * b, ("a", "a")),598(599np.array([[1.0, 1.0], [1.0, 1.0]], dtype=np.float64),600lambda a, b: a / b,601("a", "a"),602),603(np.array([[0, 0], [0, 0]], dtype=np.int64), lambda a, b: a % b, ("a", "a")),604(605np.array([[3, 4], [7, 8]], dtype=np.int64),606lambda a, b: a + b,607("a", "uint8"),608),609# This fails because the code is buggy, see610# https://github.com/pola-rs/polars/issues/17820611#612# (613# np.array([[[2, 4]], [[6, 8]]], dtype=np.int64),614# lambda a, b: a + b,615# ("nested", "nested"),616# ),617],618)619def test_array_arithmetic_same_size(620expected: Any,621expr: Callable[[pl.Series | pl.Expr, pl.Series | pl.Expr], pl.Series],622column_names: tuple[str, str],623) -> None:624df = pl.DataFrame(625[626pl.Series("a", np.array([[1, 2], [3, 4]], dtype=np.int64)),627pl.Series("uint8", np.array([[2, 2], [4, 4]], dtype=np.uint8)),628pl.Series("nested", np.array([[[1, 2]], [[3, 4]]], dtype=np.int64)),629]630)631# Expr-based arithmetic:632assert_frame_equal(633df.select(expr(pl.col(column_names[0]), pl.col(column_names[1]))),634pl.Series(column_names[0], expected).to_frame(),635)636# Direct arithmetic on the Series:637assert_series_equal(638expr(df[column_names[0]], df[column_names[1]]),639pl.Series(column_names[0], expected),640)641642643def test_schema_owned_arithmetic_5669() -> None:644df = (645pl.LazyFrame({"A": [1, 2, 3]})646.filter(pl.col("A") >= 3)647.with_columns(-pl.col("A").alias("B"))648.collect()649)650assert df.columns == ["A", "B"]651assert df.rows() == [(3, -3)]652653654def test_schema_true_divide_6643() -> None:655df = pl.DataFrame({"a": [1]})656a = pl.col("a")657assert df.lazy().select(a / 2).select(pl.col(pl.Int64)).collect().shape == (0, 0)658659660def test_literal_subtract_schema_13284() -> None:661assert (662pl.LazyFrame({"a": [23, 30]}, schema={"a": pl.UInt8})663.with_columns(pl.col("a") - pl.lit(1))664.group_by("a")665.len()666).collect_schema() == OrderedDict([("a", pl.UInt8), ("len", pl.get_index_type())])667668669@pytest.mark.parametrize("dtype", INTEGER_DTYPES)670def test_int_operator_stability(dtype: pl.DataType) -> None:671s = pl.Series(values=[10], dtype=dtype)672assert pl.select(pl.lit(s) // 2).dtypes == [dtype]673assert pl.select(pl.lit(s) + 2).dtypes == [dtype]674assert pl.select(pl.lit(s) - 2).dtypes == [dtype]675assert pl.select(pl.lit(s) * 2).dtypes == [dtype]676assert pl.select(pl.lit(s) / 2).dtypes == [pl.Float64]677678679def test_duration_division_schema() -> None:680df = pl.DataFrame({"a": [1]})681q = (682df.lazy()683.with_columns(pl.col("a").cast(pl.Duration))684.select(pl.col("a") / pl.col("a"))685)686687assert q.collect_schema() == {"a": pl.Float64}688assert q.collect().to_dict(as_series=False) == {"a": [1.0]}689690691@pytest.mark.parametrize(692("a", "b", "op"),693[694(pl.Duration, pl.Int32, "+"),695(pl.Int32, pl.Duration, "+"),696(pl.Time, pl.Int32, "+"),697(pl.Int32, pl.Time, "+"),698(pl.Date, pl.Int32, "+"),699(pl.Int32, pl.Date, "+"),700(pl.Datetime, pl.Duration, "*"),701(pl.Duration, pl.Datetime, "*"),702(pl.Date, pl.Duration, "*"),703(pl.Duration, pl.Date, "*"),704(pl.Time, pl.Duration, "*"),705(pl.Duration, pl.Time, "*"),706],707)708def test_raise_invalid_temporal(a: pl.DataType, b: pl.DataType, op: str) -> None:709a = pl.Series("a", [], dtype=a) # type: ignore[assignment]710b = pl.Series("b", [], dtype=b) # type: ignore[assignment]711_df = pl.DataFrame([a, b])712713with pytest.raises(InvalidOperationError):714eval(f"_df.select(pl.col('a') {op} pl.col('b'))")715716717def test_arithmetic_duration_div_multiply() -> None:718df = pl.DataFrame([pl.Series("a", [100, 200, 3000], dtype=pl.Duration)])719720q = df.lazy().with_columns(721b=pl.col("a") / 2,722c=pl.col("a") / 2.5,723d=pl.col("a") * 2,724e=pl.col("a") * 2.5,725f=pl.col("a") / pl.col("a"), # a constant float726)727assert q.collect_schema() == pl.Schema(728[729("a", pl.Duration(time_unit="us")),730("b", pl.Duration(time_unit="us")),731("c", pl.Duration(time_unit="us")),732("d", pl.Duration(time_unit="us")),733("e", pl.Duration(time_unit="us")),734("f", pl.Float64()),735]736)737assert q.collect().to_dict(as_series=False) == {738"a": [739timedelta(microseconds=100),740timedelta(microseconds=200),741timedelta(microseconds=3000),742],743"b": [744timedelta(microseconds=50),745timedelta(microseconds=100),746timedelta(microseconds=1500),747],748"c": [749timedelta(microseconds=40),750timedelta(microseconds=80),751timedelta(microseconds=1200),752],753"d": [754timedelta(microseconds=200),755timedelta(microseconds=400),756timedelta(microseconds=6000),757],758"e": [759timedelta(microseconds=250),760timedelta(microseconds=500),761timedelta(microseconds=7500),762],763"f": [1.0, 1.0, 1.0],764}765766# rhs767768q = df.lazy().with_columns(769b=2 * pl.col("a"),770c=2.5 * pl.col("a"),771)772assert q.collect_schema() == pl.Schema(773[774("a", pl.Duration(time_unit="us")),775("b", pl.Duration(time_unit="us")),776("c", pl.Duration(time_unit="us")),777]778)779assert q.collect().to_dict(as_series=False) == {780"a": [781timedelta(microseconds=100),782timedelta(microseconds=200),783timedelta(microseconds=3000),784],785"b": [786timedelta(microseconds=200),787timedelta(microseconds=400),788timedelta(microseconds=6000),789],790"c": [791timedelta(microseconds=250),792timedelta(microseconds=500),793timedelta(microseconds=7500),794],795}796797798def test_invalid_shapes_err() -> None:799with pytest.raises(800InvalidOperationError,801match=r"cannot do arithmetic operation on series of different lengths: got 2 and 3",802):803pl.Series([1, 2]) + pl.Series([1, 2, 3])804805806def test_date_datetime_sub() -> None:807df = pl.DataFrame({"foo": [date(2020, 1, 1)], "bar": [datetime(2020, 1, 5)]})808809assert df.select(810pl.col("foo") - pl.col("bar"),811pl.col("bar") - pl.col("foo"),812).to_dict(as_series=False) == {813"foo": [timedelta(days=-4)],814"bar": [timedelta(days=4)],815}816817818def test_time_time_sub() -> None:819df = pl.DataFrame(820{821"foo": pl.Series([-1, 0, 10]).cast(pl.Datetime("us")),822"bar": pl.Series([1, 0, 1]).cast(pl.Datetime("us")),823}824)825826assert df.select(827pl.col("foo").dt.time() - pl.col("bar").dt.time(),828pl.col("bar").dt.time() - pl.col("foo").dt.time(),829).to_dict(as_series=False) == {830"foo": [831timedelta(days=1, microseconds=-2),832timedelta(0),833timedelta(microseconds=9),834],835"bar": [836timedelta(days=-1, microseconds=2),837timedelta(0),838timedelta(microseconds=-9),839],840}841842843def test_raise_invalid_shape() -> None:844with pytest.raises(InvalidOperationError):845pl.DataFrame([[1, 2], [3, 4]]) * pl.DataFrame([1, 2, 3])846847848def test_integer_divide_scalar_zero_lhs_19142() -> None:849assert_series_equal(pl.Series([0]) // pl.Series([1, 0]), pl.Series([0, None]))850assert_series_equal(pl.Series([0]) % pl.Series([1, 0]), pl.Series([0, None]))851852853def test_compound_duration_21389() -> None:854# test add855lf = pl.LazyFrame(856{857"ts": datetime(2024, 1, 1, 1, 2, 3),858"duration": timedelta(days=1),859}860)861result = lf.select(pl.col("ts") + pl.col("duration") * 2)862expected_schema = pl.Schema({"ts": pl.Datetime(time_unit="us", time_zone=None)})863expected = pl.DataFrame({"ts": datetime(2024, 1, 3, 1, 2, 3)})864assert result.collect_schema() == expected_schema865assert_frame_equal(result.collect(), expected)866867# test subtract868result = lf.select(pl.col("ts") - pl.col("duration") * 2)869expected_schema = pl.Schema({"ts": pl.Datetime(time_unit="us", time_zone=None)})870expected = pl.DataFrame({"ts": datetime(2023, 12, 30, 1, 2, 3)})871assert result.collect_schema() == expected_schema872assert_frame_equal(result.collect(), expected)873874875@pytest.mark.parametrize("dtype", INTEGER_DTYPES)876def test_arithmetic_i128(dtype: PolarsIntegerType) -> None:877s = pl.Series("a", [0, 1, 127], dtype=dtype, strict=False)878s128 = pl.Series("a", [0, 0, 0], dtype=pl.Int128)879expected = pl.Series("a", [0, 1, 127], dtype=pl.Int128)880assert_series_equal(s + s128, expected)881assert_series_equal(s128 + s, expected)882883884def test_arithmetic_i128_nonint() -> None:885s128 = pl.Series("a", [0], dtype=pl.Int128)886887s = pl.Series("a", [1.0], dtype=pl.Float32)888assert_series_equal(s + s128, pl.Series("a", [1.0], dtype=pl.Float64))889assert_series_equal(s128 + s, pl.Series("a", [1.0], dtype=pl.Float64))890891s = pl.Series("a", [1.0], dtype=pl.Float64)892assert_series_equal(s + s128, s)893assert_series_equal(s128 + s, s)894895s = pl.Series("a", [True], dtype=pl.Boolean)896assert_series_equal(s + s128, pl.Series("a", [1], dtype=pl.Int128))897assert_series_equal(s128 + s, pl.Series("a", [1], dtype=pl.Int128))898899900@pytest.mark.parametrize("dtype", INTEGER_DTYPES)901def test_arithmetic_u128(dtype: PolarsIntegerType) -> None:902s = pl.Series("a", [0, 1, 127], dtype=dtype, strict=False)903s128 = pl.Series("a", [0, 0, 0], dtype=pl.UInt128)904expected_dtype = pl.UInt128 if dtype in UNSIGNED_INTEGER_DTYPES else pl.Int128905expected = pl.Series("a", [0, 1, 127], dtype=expected_dtype)906assert_series_equal(s + s128, expected)907assert_series_equal(s128 + s, expected)908909910def test_arithmetic_u128_nonint() -> None:911s128 = pl.Series("a", [0], dtype=pl.UInt128)912913s = pl.Series("a", [1.0], dtype=pl.Float32)914assert_series_equal(s + s128, pl.Series("a", [1.0], dtype=pl.Float64))915assert_series_equal(s128 + s, pl.Series("a", [1.0], dtype=pl.Float64))916917s = pl.Series("a", [1.0], dtype=pl.Float64)918assert_series_equal(s + s128, s)919assert_series_equal(s128 + s, s)920921s = pl.Series("a", [True], dtype=pl.Boolean)922assert_series_equal(s + s128, pl.Series("a", [1], dtype=pl.UInt128))923assert_series_equal(s128 + s, pl.Series("a", [1], dtype=pl.UInt128))924925926def test_float_truediv_output_type() -> None:927lf = pl.LazyFrame(schema={"f32": pl.Float32, "f64": pl.Float64})928assert lf.select(x=pl.col("f32") / pl.col("f32")).collect_schema() == pl.Schema(929{"x": pl.Float32}930)931assert lf.select(x=pl.col("f32") / pl.col("f64")).collect_schema() == pl.Schema(932{"x": pl.Float64}933)934assert lf.select(x=pl.col("f64") / pl.col("f32")).collect_schema() == pl.Schema(935{"x": pl.Float64}936)937assert lf.select(x=pl.col("f64") / pl.col("f64")).collect_schema() == pl.Schema(938{"x": pl.Float64}939)940941942@pytest.mark.parametrize(943"dtype",944[945pl.Float64,946pl.Int32,947pl.Decimal(21, 3),948],949)950def test_log_exp(dtype: pl.DataType) -> None:951df = pl.DataFrame(952{953"a": pl.Series("a", [1, 100, 1000], dtype=dtype),954"b": pl.Series("a", [0, 2, 3], dtype=dtype),955}956)957958result = df.lazy().select(959log10=pl.col("a").log10(),960log=pl.col("a").log(),961exp=pl.col("b").exp(),962log1p=pl.col("a").log1p(),963)964expected = df.select(965log10=pl.col("b").cast(pl.Float64),966log=pl.Series(np.log(df["a"].cast(pl.Float64).to_numpy())),967exp=pl.Series(np.exp(df["b"].cast(pl.Float64).to_numpy())),968log1p=pl.Series(np.log1p(df["a"].cast(pl.Float64).to_numpy())),969)970971assert_frame_equal(result.collect(), expected)972assert result.collect_schema() == expected.schema973974975@pytest.mark.parametrize(976"dtype",977[978pl.Float64,979pl.Float32,980],981)982def test_log_broadcast(dtype: pl.DataType) -> None:983a = pl.Series("a", [1, 3, 9, 27, 81], dtype=dtype)984b = pl.Series("a", [3, 3, 9, 3, 9], dtype=dtype)985986assert_series_equal(a.log(b), pl.Series("a", [0, 1, 1, 3, 2], dtype=dtype))987assert_series_equal(988a.log(pl.Series("a", [3], dtype=dtype)),989pl.Series("a", [0, 1, 2, 3, 4], dtype=dtype),990)991assert_series_equal(992pl.Series("a", [81], dtype=dtype).log(b),993pl.Series("a", [4, 4, 2, 4, 2], dtype=dtype),994)995996997