Path: blob/main/py-polars/tests/unit/operations/arithmetic/test_arithmetic.py
6940 views
from __future__ import annotations12import operator3from collections import OrderedDict4from datetime import date, datetime, timedelta5from typing import TYPE_CHECKING, Any, Callable67import numpy as np8import pytest910import polars as pl11from polars import (12Date,13Float64,14Int8,15Int16,16Int32,17Int64,18UInt8,19UInt16,20UInt32,21UInt64,22)23from polars.exceptions import ColumnNotFoundError, InvalidOperationError24from polars.testing import assert_frame_equal, assert_series_equal25from tests.unit.conftest import INTEGER_DTYPES, NUMERIC_DTYPES2627if TYPE_CHECKING:28from polars._typing import PolarsIntegerType293031def test_sqrt_neg_inf() -> None:32out = pl.DataFrame(33{34"val": [float("-Inf"), -9, 0, 9, float("Inf")],35}36).with_columns(pl.col("val").sqrt().alias("sqrt"))37# comparing nans and infinities by string value as they are not cmp38assert str(out["sqrt"].to_list()) == str(39[float("nan"), float("nan"), 0.0, 3.0, float("Inf")]40)414243def test_arithmetic_with_logical_on_series_4920() -> None:44assert (pl.Series([date(2022, 6, 3)]) - date(2022, 1, 1)).dtype == pl.Duration("us")454647@pytest.mark.parametrize(48("left", "right", "expected_value", "expected_dtype"),49[50(date(2021, 1, 1), date(2020, 1, 1), timedelta(days=366), pl.Duration("us")),51(52datetime(2021, 1, 1),53datetime(2020, 1, 1),54timedelta(days=366),55pl.Duration("us"),56),57(timedelta(days=1), timedelta(days=2), timedelta(days=-1), pl.Duration("us")),58(2.0, 3.0, -1.0, pl.Float64),59],60)61def test_arithmetic_sub(62left: object, right: object, expected_value: object, expected_dtype: pl.DataType63) -> None:64result = left - pl.Series([right])65expected = pl.Series("", [expected_value], dtype=expected_dtype)66assert_series_equal(result, expected)67result = pl.Series([left]) - right68assert_series_equal(result, expected)697071def test_struct_arithmetic() -> None:72df = pl.DataFrame(73{74"a": [1, 2],75"b": [3, 4],76"c": [5, 6],77}78).select(pl.cum_sum_horizontal("a", "c"))7980q = df.lazy().select(pl.col("cum_sum") * 2)81out = q.collect()82assert out.to_dict(as_series=False) == {83"cum_sum": [{"a": 2, "c": 12}, {"a": 4, "c": 16}]84}85assert q.collect_schema() == out.schema8687q = df.lazy().select(pl.col("cum_sum") - 2)88out = q.collect()89assert out.to_dict(as_series=False) == {90"cum_sum": [{"a": -1, "c": 4}, {"a": 0, "c": 6}]91}92assert q.collect_schema() == out.schema9394q = df.lazy().select(pl.col("cum_sum") + 2)95out = q.collect()96assert out.to_dict(as_series=False) == {97"cum_sum": [{"a": 3, "c": 8}, {"a": 4, "c": 10}]98}99assert q.collect_schema() == out.schema100101q = df.lazy().select(pl.col("cum_sum") / 2)102out = q.collect()103assert out.to_dict(as_series=False) == {104"cum_sum": [{"a": 0.5, "c": 3.0}, {"a": 1.0, "c": 4.0}]105}106assert q.collect_schema() == out.schema107108q = df.lazy().select(pl.col("cum_sum") // 2)109out = q.collect()110assert out.to_dict(as_series=False) == {111"cum_sum": [{"a": 0, "c": 3}, {"a": 1, "c": 4}]112}113assert q.collect_schema() == out.schema114115# inline, this checks cum_sum reports the right output type116assert pl.DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}).select(117pl.cum_sum_horizontal("a", "c") * 3118).to_dict(as_series=False) == {"cum_sum": [{"a": 3, "c": 18}, {"a": 6, "c": 24}]}119120121def test_simd_float_sum_determinism() -> None:122out = []123for _ in range(10):124a = pl.Series(125[1260.021415853782953836,1270.06234123511682772,1280.016962384922753124,1290.002595968402539279,1300.007632765529696731,1310.012105848332077212,1320.021439787151032317,1330.3223049133700719,1340.10526670729539435,1350.0859029285522487,136]137)138out.append(a.sum())139140assert out == [1410.6579683924555951,1420.6579683924555951,1430.6579683924555951,1440.6579683924555951,1450.6579683924555951,1460.6579683924555951,1470.6579683924555951,1480.6579683924555951,1490.6579683924555951,1500.6579683924555951,151]152153154def test_floor_division_float_int_consistency() -> None:155a = np.random.randn(10) * 10156157assert (pl.Series(a) // 5).to_list() == list(a // 5)158assert (pl.Series(a, dtype=pl.Int32) // 5).to_list() == list(159(a.astype(int) // 5).astype(int)160)161162163def test_series_expr_arithm() -> None:164s = pl.Series([1, 2, 3])165assert (s + pl.col("a")).meta == pl.lit(s) + pl.col("a")166assert (s - pl.col("a")).meta == pl.lit(s) - pl.col("a")167assert (s / pl.col("a")).meta == pl.lit(s) / pl.col("a")168assert (s // pl.col("a")).meta == pl.lit(s) // pl.col("a")169assert (s * pl.col("a")).meta == pl.lit(s) * pl.col("a")170assert (s % pl.col("a")).meta == pl.lit(s) % pl.col("a")171172173def test_fused_arithm() -> None:174df = pl.DataFrame(175{176"a": [1, 2, 3],177"b": [10, 20, 30],178"c": [5, 5, 5],179}180)181182q = df.lazy().select(183pl.col("a") * pl.col("b") + pl.col("c"),184(pl.col("a") + pl.col("b") * pl.col("c")).alias("2"),185)186# the extra aliases are because the fma does operation reordering187assert (188"""col("c").fma([col("a"), col("b")]).alias("a"), col("a").fma([col("b"), col("c")]).alias("2")"""189in q.explain()190)191assert q.collect().to_dict(as_series=False) == {192"a": [15, 45, 95],193"2": [51, 102, 153],194}195# fsm196q = df.lazy().select(pl.col("a") - pl.col("b") * pl.col("c"))197assert """col("a").fsm([col("b"), col("c")])""" in q.explain()198assert q.collect()["a"].to_list() == [-49, -98, -147]199# fms200q = df.lazy().select(pl.col("a") * pl.col("b") - pl.col("c"))201assert """col("a").fms([col("b"), col("c")])""" in q.explain()202assert q.collect()["a"].to_list() == [5, 35, 85]203204# check if we constant fold instead of fma205q = df.lazy().select(pl.lit(1) * pl.lit(2) - pl.col("c"))206assert """(2) - (col("c")""" in q.explain()207208# Check if fused is turned off for literals see: #9857209for expr in [210pl.col("c") * 2 + 5,211pl.col("c") * 2 + pl.col("c"),212pl.col("c") * 2 - 5,213pl.col("c") * 2 - pl.col("c"),2145 - pl.col("c") * 2,215pl.col("c") - pl.col("c") * 2,216]:217q = df.lazy().select(expr)218assert all(el not in q.explain() for el in ["fms", "fsm", "fma"]), (219f"Fused Arithmetic applied on literal {expr}: {q.explain()}"220)221222223def test_literal_no_upcast() -> None:224df = pl.DataFrame({"a": pl.Series([1, 2, 3], dtype=pl.Float32)})225226q = (227df.lazy()228.select(229(pl.col("a") * -5 + 2).alias("fma"),230(2 - pl.col("a") * 5).alias("fsm"),231(pl.col("a") * 5 - 2).alias("fms"),232)233.collect()234)235assert set(q.schema.values()) == {pl.Float32}, (236"Literal * Column (Float32) should not lead upcast"237)238239240def test_boolean_addition() -> None:241s = pl.DataFrame(242{"a": [True, False, False], "b": [True, False, True]}243).sum_horizontal()244245assert s.dtype == pl.get_index_type()246assert s.to_list() == [2, 0, 1]247df = pl.DataFrame(248{"a": [True], "b": [False]},249).select(pl.sum_horizontal("a", "b"))250assert df.dtypes == [pl.get_index_type()]251252253def test_bitwise_6311() -> None:254df = pl.DataFrame({"col1": [0, 1, 2, 3], "flag": [0, 0, 0, 0]})255256assert (257df.with_columns(258pl.when((pl.col("col1") < 1) | (pl.col("col1") >= 3))259.then(pl.col("flag") | 2) # set flag b0010260.otherwise(pl.col("flag"))261).with_columns(262pl.when(pl.col("col1") > -1)263.then(pl.col("flag") | 4)264.otherwise(pl.col("flag"))265)266).to_dict(as_series=False) == {"col1": [0, 1, 2, 3], "flag": [6, 4, 4, 6]}267268269def test_arithmetic_null_count() -> None:270df = pl.DataFrame({"a": [1, None, 2], "b": [None, 2, 1]})271out = df.select(272no_broadcast=pl.col("a") + pl.col("b"),273broadcast_left=1 + pl.col("b"),274broadcast_right=pl.col("a") + 1,275)276assert out.null_count().to_dict(as_series=False) == {277"no_broadcast": [2],278"broadcast_left": [1],279"broadcast_right": [1],280}281282283@pytest.mark.parametrize(284"op",285[286operator.add,287operator.floordiv,288operator.mod,289operator.mul,290operator.sub,291],292)293@pytest.mark.parametrize("dtype", NUMERIC_DTYPES)294def test_operator_arithmetic_with_nulls(op: Any, dtype: pl.DataType) -> None:295df = pl.DataFrame({"n": [2, 3]}, schema={"n": dtype})296s = df.to_series()297298df_expected = pl.DataFrame({"n": [None, None]}, schema={"n": dtype})299s_expected = df_expected.to_series()300301# validate expr, frame, and series behaviour with null value arithmetic302op_name = op.__name__303for null_expr in (None, pl.lit(None)):304assert_frame_equal(df_expected, df.select(op(pl.col("n"), null_expr)))305assert_frame_equal(306df_expected, df.select(getattr(pl.col("n"), op_name)(null_expr))307)308309assert_frame_equal(op(df, None), df_expected)310assert_series_equal(op(s, None), s_expected)311312313@pytest.mark.parametrize(314"op",315[316operator.add,317operator.mod,318operator.mul,319operator.sub,320],321)322def test_null_column_arithmetic(op: Any) -> None:323df = pl.DataFrame({"a": [None, None], "b": [None, None]})324expected_df = pl.DataFrame({"a": [None, None]})325326output_df = df.select(op(pl.col("a"), pl.col("b")))327assert_frame_equal(expected_df, output_df)328# test broadcast right329output_df = df.select(op(pl.col("a"), pl.Series([None])))330assert_frame_equal(expected_df, output_df)331# test broadcast left332output_df = df.select(op(pl.Series("a", [None]), pl.col("a")))333assert_frame_equal(expected_df, output_df)334335336def test_bool_floordiv() -> None:337df = pl.DataFrame({"x": [True]})338339with pytest.raises(340InvalidOperationError,341match="floor_div operation not supported for dtype `bool`",342):343df.with_columns(pl.col("x").floordiv(2))344345346def test_arithmetic_in_aggregation_3739() -> None:347def demean_dot() -> pl.Expr:348x = pl.col("x")349y = pl.col("y")350x1 = x - x.mean()351y1 = y - y.mean()352return (x1 * y1).sum().alias("demean_dot")353354assert (355pl.DataFrame(356{357"key": ["a", "a", "a", "a"],358"x": [4, 2, 2, 4],359"y": [2, 0, 2, 0],360}361)362.group_by("key")363.agg(364[365demean_dot(),366]367)368).to_dict(as_series=False) == {"key": ["a"], "demean_dot": [0.0]}369370371def test_arithmetic_on_df() -> None:372df = pl.DataFrame({"a": [1.0, 2.0], "b": [3.0, 4.0]})373374for df_mul in (df * 2, 2 * df):375expected = pl.DataFrame({"a": [2.0, 4.0], "b": [6.0, 8.0]})376assert_frame_equal(df_mul, expected)377378for df_plus in (df + 2, 2 + df):379expected = pl.DataFrame({"a": [3.0, 4.0], "b": [5.0, 6.0]})380assert_frame_equal(df_plus, expected)381382df_div = df / 2383expected = pl.DataFrame({"a": [0.5, 1.0], "b": [1.5, 2.0]})384assert_frame_equal(df_div, expected)385386df_minus = df - 2387expected = pl.DataFrame({"a": [-1.0, 0.0], "b": [1.0, 2.0]})388assert_frame_equal(df_minus, expected)389390df_mod = df % 2391expected = pl.DataFrame({"a": [1.0, 0.0], "b": [1.0, 0.0]})392assert_frame_equal(df_mod, expected)393394df2 = pl.DataFrame({"c": [10]})395396out = df + df2397expected = pl.DataFrame({"a": [11.0, None], "b": [None, None]}).with_columns(398pl.col("b").cast(pl.Float64)399)400assert_frame_equal(out, expected)401402out = df - df2403expected = pl.DataFrame({"a": [-9.0, None], "b": [None, None]}).with_columns(404pl.col("b").cast(pl.Float64)405)406assert_frame_equal(out, expected)407408out = df / df2409expected = pl.DataFrame({"a": [0.1, None], "b": [None, None]}).with_columns(410pl.col("b").cast(pl.Float64)411)412assert_frame_equal(out, expected)413414out = df * df2415expected = pl.DataFrame({"a": [10.0, None], "b": [None, None]}).with_columns(416pl.col("b").cast(pl.Float64)417)418assert_frame_equal(out, expected)419420out = df % df2421expected = pl.DataFrame({"a": [1.0, None], "b": [None, None]}).with_columns(422pl.col("b").cast(pl.Float64)423)424assert_frame_equal(out, expected)425426# cannot do arithmetic with a sequence427with pytest.raises(TypeError, match="operation not supported"):428_ = df + [1] # type: ignore[operator]429430431def test_df_series_division() -> None:432df = pl.DataFrame(433{434"a": [2, 2, 4, 4, 6, 6],435"b": [2, 2, 10, 5, 6, 6],436}437)438s = pl.Series([2, 2, 2, 2, 2, 2])439assert (df / s).to_dict(as_series=False) == {440"a": [1.0, 1.0, 2.0, 2.0, 3.0, 3.0],441"b": [1.0, 1.0, 5.0, 2.5, 3.0, 3.0],442}443assert (df // s).to_dict(as_series=False) == {444"a": [1, 1, 2, 2, 3, 3],445"b": [1, 1, 5, 2, 3, 3],446}447448449@pytest.mark.parametrize(450"s", [pl.Series([1, 2], dtype=Int64), pl.Series([1, 2], dtype=Float64)]451)452def test_arithmetic_series(s: pl.Series) -> None:453a = s454b = s455456assert ((a * b) == [1, 4]).sum() == 2457assert ((a / b) == [1.0, 1.0]).sum() == 2458assert ((a + b) == [2, 4]).sum() == 2459assert ((a - b) == [0, 0]).sum() == 2460assert ((a + 1) == [2, 3]).sum() == 2461assert ((a - 1) == [0, 1]).sum() == 2462assert ((a / 1) == [1.0, 2.0]).sum() == 2463assert ((a // 2) == [0, 1]).sum() == 2464assert ((a * 2) == [2, 4]).sum() == 2465assert ((2 + a) == [3, 4]).sum() == 2466assert ((1 - a) == [0, -1]).sum() == 2467assert ((2 * a) == [2, 4]).sum() == 2468469# integer division470assert_series_equal(1 / a, pl.Series([1.0, 0.5]))471expected = pl.Series([1, 0]) if s.dtype == Int64 else pl.Series([1.0, 0.5])472assert_series_equal(1 // a, expected)473# modulo474assert ((1 % a) == [0, 1]).sum() == 2475assert ((a % 1) == [0, 0]).sum() == 2476# negate477assert (-a == [-1, -2]).sum() == 2478# unary plus479assert (+a == a).all()480# wrong dtypes in rhs operands481assert ((1.0 - a) == [0.0, -1.0]).sum() == 2482assert ((1.0 / a) == [1.0, 0.5]).sum() == 2483assert ((1.0 * a) == [1, 2]).sum() == 2484assert ((1.0 + a) == [2, 3]).sum() == 2485assert ((1.0 % a) == [0, 1]).sum() == 2486487488def test_arithmetic_datetime() -> None:489a = pl.Series("a", [datetime(2021, 1, 1)])490with pytest.raises(TypeError):491a // 2492with pytest.raises(TypeError):493a / 2494with pytest.raises(TypeError):495a * 2496with pytest.raises(TypeError):497a % 2498with pytest.raises(499InvalidOperationError,500):501a**2502with pytest.raises(TypeError):5032 / a504with pytest.raises(TypeError):5052 // a506with pytest.raises(TypeError):5072 * a508with pytest.raises(TypeError):5092 % a510with pytest.raises(511InvalidOperationError,512):5132**a514515516def test_power_series() -> None:517a = pl.Series([1, 2], dtype=Int64)518b = pl.Series([None, 2.0], dtype=Float64)519c = pl.Series([date(2020, 2, 28), date(2020, 3, 1)], dtype=Date)520d = pl.Series([1, 2], dtype=UInt8)521e = pl.Series([1, 2], dtype=Int8)522f = pl.Series([1, 2], dtype=UInt16)523g = pl.Series([1, 2], dtype=Int16)524h = pl.Series([1, 2], dtype=UInt32)525i = pl.Series([1, 2], dtype=Int32)526j = pl.Series([1, 2], dtype=UInt64)527k = pl.Series([1, 2], dtype=Int64)528m = pl.Series([2**33, 2**33], dtype=UInt64)529530# pow531assert_series_equal(a**2, pl.Series([1, 4], dtype=Int64))532assert_series_equal(b**3, pl.Series([None, 8.0], dtype=Float64))533assert_series_equal(a**a, pl.Series([1, 4], dtype=Int64))534assert_series_equal(b**b, pl.Series([None, 4.0], dtype=Float64))535assert_series_equal(a**b, pl.Series([None, 4.0], dtype=Float64))536assert_series_equal(d**d, pl.Series([1, 4], dtype=UInt8))537assert_series_equal(e**d, pl.Series([1, 4], dtype=Int8))538assert_series_equal(f**d, pl.Series([1, 4], dtype=UInt16))539assert_series_equal(g**d, pl.Series([1, 4], dtype=Int16))540assert_series_equal(h**d, pl.Series([1, 4], dtype=UInt32))541assert_series_equal(i**d, pl.Series([1, 4], dtype=Int32))542assert_series_equal(j**d, pl.Series([1, 4], dtype=UInt64))543assert_series_equal(k**d, pl.Series([1, 4], dtype=Int64))544545with pytest.raises(546InvalidOperationError,547match="`pow` operation not supported for dtype `null` as exponent",548):549a ** pl.lit(None)550551with pytest.raises(552InvalidOperationError,553match="`pow` operation not supported for dtype `date` as base",554):555c**2556with pytest.raises(557InvalidOperationError,558match="`pow` operation not supported for dtype `date` as exponent",559):5602**c561562with pytest.raises(ColumnNotFoundError):563a ** "hi" # type: ignore[operator]564565# Raising to UInt64: raises if can't be downcast safely to UInt32...566with pytest.raises(567InvalidOperationError, match="conversion from `u64` to `u32` failed"568):569a**m570# ... but succeeds otherwise.571assert_series_equal(a**j, pl.Series([1, 4], dtype=Int64))572573# rpow574assert_series_equal(2.0**a, pl.Series(None, [2.0, 4.0], dtype=Float64))575assert_series_equal(2**b, pl.Series(None, [None, 4.0], dtype=Float64))576577with pytest.raises(ColumnNotFoundError):578"hi" ** a579580# Series.pow() method581assert_series_equal(a.pow(2), pl.Series([1, 4], dtype=Int64))582583584def test_rpow_name_20071() -> None:585result = 1 ** pl.Series("a", [1, 2])586expected = pl.Series("a", [1, 1], pl.Int32)587assert_series_equal(result, expected)588589590@pytest.mark.parametrize(591("expected", "expr", "column_names"),592[593(np.array([[2, 4], [6, 8]], dtype=np.int64), lambda a, b: a + b, ("a", "a")),594(np.array([[0, 0], [0, 0]], dtype=np.int64), lambda a, b: a - b, ("a", "a")),595(np.array([[1, 4], [9, 16]], dtype=np.int64), lambda a, b: a * b, ("a", "a")),596(597np.array([[1.0, 1.0], [1.0, 1.0]], dtype=np.float64),598lambda a, b: a / b,599("a", "a"),600),601(np.array([[0, 0], [0, 0]], dtype=np.int64), lambda a, b: a % b, ("a", "a")),602(603np.array([[3, 4], [7, 8]], dtype=np.int64),604lambda a, b: a + b,605("a", "uint8"),606),607# This fails because the code is buggy, see608# https://github.com/pola-rs/polars/issues/17820609#610# (611# np.array([[[2, 4]], [[6, 8]]], dtype=np.int64),612# lambda a, b: a + b,613# ("nested", "nested"),614# ),615],616)617def test_array_arithmetic_same_size(618expected: Any,619expr: Callable[[pl.Series | pl.Expr, pl.Series | pl.Expr], pl.Series],620column_names: tuple[str, str],621) -> None:622df = pl.DataFrame(623[624pl.Series("a", np.array([[1, 2], [3, 4]], dtype=np.int64)),625pl.Series("uint8", np.array([[2, 2], [4, 4]], dtype=np.uint8)),626pl.Series("nested", np.array([[[1, 2]], [[3, 4]]], dtype=np.int64)),627]628)629# Expr-based arithmetic:630assert_frame_equal(631df.select(expr(pl.col(column_names[0]), pl.col(column_names[1]))),632pl.Series(column_names[0], expected).to_frame(),633)634# Direct arithmetic on the Series:635assert_series_equal(636expr(df[column_names[0]], df[column_names[1]]),637pl.Series(column_names[0], expected),638)639640641def test_schema_owned_arithmetic_5669() -> None:642df = (643pl.LazyFrame({"A": [1, 2, 3]})644.filter(pl.col("A") >= 3)645.with_columns(-pl.col("A").alias("B"))646.collect()647)648assert df.columns == ["A", "B"]649assert df.rows() == [(3, -3)]650651652def test_schema_true_divide_6643() -> None:653df = pl.DataFrame({"a": [1]})654a = pl.col("a")655assert df.lazy().select(a / 2).select(pl.col(pl.Int64)).collect().shape == (0, 0)656657658def test_literal_subtract_schema_13284() -> None:659assert (660pl.LazyFrame({"a": [23, 30]}, schema={"a": pl.UInt8})661.with_columns(pl.col("a") - pl.lit(1))662.group_by("a")663.len()664).collect_schema() == OrderedDict([("a", pl.UInt8), ("len", pl.UInt32)])665666667@pytest.mark.parametrize("dtype", INTEGER_DTYPES)668def test_int_operator_stability(dtype: pl.DataType) -> None:669s = pl.Series(values=[10], dtype=dtype)670assert pl.select(pl.lit(s) // 2).dtypes == [dtype]671assert pl.select(pl.lit(s) + 2).dtypes == [dtype]672assert pl.select(pl.lit(s) - 2).dtypes == [dtype]673assert pl.select(pl.lit(s) * 2).dtypes == [dtype]674assert pl.select(pl.lit(s) / 2).dtypes == [pl.Float64]675676677def test_duration_division_schema() -> None:678df = pl.DataFrame({"a": [1]})679q = (680df.lazy()681.with_columns(pl.col("a").cast(pl.Duration))682.select(pl.col("a") / pl.col("a"))683)684685assert q.collect_schema() == {"a": pl.Float64}686assert q.collect().to_dict(as_series=False) == {"a": [1.0]}687688689@pytest.mark.parametrize(690("a", "b", "op"),691[692(pl.Duration, pl.Int32, "+"),693(pl.Int32, pl.Duration, "+"),694(pl.Time, pl.Int32, "+"),695(pl.Int32, pl.Time, "+"),696(pl.Date, pl.Int32, "+"),697(pl.Int32, pl.Date, "+"),698(pl.Datetime, pl.Duration, "*"),699(pl.Duration, pl.Datetime, "*"),700(pl.Date, pl.Duration, "*"),701(pl.Duration, pl.Date, "*"),702(pl.Time, pl.Duration, "*"),703(pl.Duration, pl.Time, "*"),704],705)706def test_raise_invalid_temporal(a: pl.DataType, b: pl.DataType, op: str) -> None:707a = pl.Series("a", [], dtype=a) # type: ignore[assignment]708b = pl.Series("b", [], dtype=b) # type: ignore[assignment]709_df = pl.DataFrame([a, b])710711with pytest.raises(InvalidOperationError):712eval(f"_df.select(pl.col('a') {op} pl.col('b'))")713714715def test_arithmetic_duration_div_multiply() -> None:716df = pl.DataFrame([pl.Series("a", [100, 200, 3000], dtype=pl.Duration)])717718q = df.lazy().with_columns(719b=pl.col("a") / 2,720c=pl.col("a") / 2.5,721d=pl.col("a") * 2,722e=pl.col("a") * 2.5,723f=pl.col("a") / pl.col("a"), # a constant float724)725assert q.collect_schema() == pl.Schema(726[727("a", pl.Duration(time_unit="us")),728("b", pl.Duration(time_unit="us")),729("c", pl.Duration(time_unit="us")),730("d", pl.Duration(time_unit="us")),731("e", pl.Duration(time_unit="us")),732("f", pl.Float64()),733]734)735assert q.collect().to_dict(as_series=False) == {736"a": [737timedelta(microseconds=100),738timedelta(microseconds=200),739timedelta(microseconds=3000),740],741"b": [742timedelta(microseconds=50),743timedelta(microseconds=100),744timedelta(microseconds=1500),745],746"c": [747timedelta(microseconds=40),748timedelta(microseconds=80),749timedelta(microseconds=1200),750],751"d": [752timedelta(microseconds=200),753timedelta(microseconds=400),754timedelta(microseconds=6000),755],756"e": [757timedelta(microseconds=250),758timedelta(microseconds=500),759timedelta(microseconds=7500),760],761"f": [1.0, 1.0, 1.0],762}763764# rhs765766q = df.lazy().with_columns(767b=2 * pl.col("a"),768c=2.5 * pl.col("a"),769)770assert q.collect_schema() == pl.Schema(771[772("a", pl.Duration(time_unit="us")),773("b", pl.Duration(time_unit="us")),774("c", pl.Duration(time_unit="us")),775]776)777assert q.collect().to_dict(as_series=False) == {778"a": [779timedelta(microseconds=100),780timedelta(microseconds=200),781timedelta(microseconds=3000),782],783"b": [784timedelta(microseconds=200),785timedelta(microseconds=400),786timedelta(microseconds=6000),787],788"c": [789timedelta(microseconds=250),790timedelta(microseconds=500),791timedelta(microseconds=7500),792],793}794795796def test_invalid_shapes_err() -> None:797with pytest.raises(798InvalidOperationError,799match=r"cannot do arithmetic operation on series of different lengths: got 2 and 3",800):801pl.Series([1, 2]) + pl.Series([1, 2, 3])802803804def test_date_datetime_sub() -> None:805df = pl.DataFrame({"foo": [date(2020, 1, 1)], "bar": [datetime(2020, 1, 5)]})806807assert df.select(808pl.col("foo") - pl.col("bar"),809pl.col("bar") - pl.col("foo"),810).to_dict(as_series=False) == {811"foo": [timedelta(days=-4)],812"bar": [timedelta(days=4)],813}814815816def test_time_time_sub() -> None:817df = pl.DataFrame(818{819"foo": pl.Series([-1, 0, 10]).cast(pl.Datetime("us")),820"bar": pl.Series([1, 0, 1]).cast(pl.Datetime("us")),821}822)823824assert df.select(825pl.col("foo").dt.time() - pl.col("bar").dt.time(),826pl.col("bar").dt.time() - pl.col("foo").dt.time(),827).to_dict(as_series=False) == {828"foo": [829timedelta(days=1, microseconds=-2),830timedelta(0),831timedelta(microseconds=9),832],833"bar": [834timedelta(days=-1, microseconds=2),835timedelta(0),836timedelta(microseconds=-9),837],838}839840841def test_raise_invalid_shape() -> None:842with pytest.raises(InvalidOperationError):843pl.DataFrame([[1, 2], [3, 4]]) * pl.DataFrame([1, 2, 3])844845846def test_integer_divide_scalar_zero_lhs_19142() -> None:847assert_series_equal(pl.Series([0]) // pl.Series([1, 0]), pl.Series([0, None]))848assert_series_equal(pl.Series([0]) % pl.Series([1, 0]), pl.Series([0, None]))849850851def test_compound_duration_21389() -> None:852# test add853lf = pl.LazyFrame(854{855"ts": datetime(2024, 1, 1, 1, 2, 3),856"duration": timedelta(days=1),857}858)859result = lf.select(pl.col("ts") + pl.col("duration") * 2)860expected_schema = pl.Schema({"ts": pl.Datetime(time_unit="us", time_zone=None)})861expected = pl.DataFrame({"ts": datetime(2024, 1, 3, 1, 2, 3)})862assert result.collect_schema() == expected_schema863assert_frame_equal(result.collect(), expected)864865# test subtract866result = lf.select(pl.col("ts") - pl.col("duration") * 2)867expected_schema = pl.Schema({"ts": pl.Datetime(time_unit="us", time_zone=None)})868expected = pl.DataFrame({"ts": datetime(2023, 12, 30, 1, 2, 3)})869assert result.collect_schema() == expected_schema870assert_frame_equal(result.collect(), expected)871872873@pytest.mark.parametrize("dtype", INTEGER_DTYPES)874def test_arithmetic_i128(dtype: PolarsIntegerType) -> None:875s = pl.Series("a", [0, 1, 127], dtype=dtype, strict=False)876s128 = pl.Series("a", [0, 0, 0], dtype=pl.Int128)877expected = pl.Series("a", [0, 1, 127], dtype=pl.Int128)878assert_series_equal(s + s128, expected)879assert_series_equal(s128 + s, expected)880881882def test_arithmetic_i128_nonint() -> None:883s128 = pl.Series("a", [0], dtype=pl.Int128)884885s = pl.Series("a", [1.0], dtype=pl.Float32)886assert_series_equal(s + s128, pl.Series("a", [1.0], dtype=pl.Float64))887assert_series_equal(s128 + s, pl.Series("a", [1.0], dtype=pl.Float64))888889s = pl.Series("a", [1.0], dtype=pl.Float64)890assert_series_equal(s + s128, s)891assert_series_equal(s128 + s, s)892893s = pl.Series("a", [True], dtype=pl.Boolean)894assert_series_equal(s + s128, pl.Series("a", [1], dtype=pl.Int128))895assert_series_equal(s128 + s, pl.Series("a", [1], dtype=pl.Int128))896897898def test_float_truediv_output_type() -> None:899lf = pl.LazyFrame(schema={"f32": pl.Float32, "f64": pl.Float64})900assert lf.select(x=pl.col("f32") / pl.col("f32")).collect_schema() == pl.Schema(901{"x": pl.Float32}902)903assert lf.select(x=pl.col("f32") / pl.col("f64")).collect_schema() == pl.Schema(904{"x": pl.Float64}905)906assert lf.select(x=pl.col("f64") / pl.col("f32")).collect_schema() == pl.Schema(907{"x": pl.Float64}908)909assert lf.select(x=pl.col("f64") / pl.col("f64")).collect_schema() == pl.Schema(910{"x": pl.Float64}911)912913914@pytest.mark.parametrize(915"dtype",916[917pl.Float64,918pl.Int32,919pl.Decimal(21, 3),920],921)922def test_log_exp(dtype: pl.DataType) -> None:923df = pl.DataFrame(924{925"a": pl.Series("a", [1, 100, 1000], dtype=dtype),926"b": pl.Series("a", [0, 2, 3], dtype=dtype),927}928)929930result = df.lazy().select(931log10=pl.col("a").log10(),932log=pl.col("a").log(),933exp=pl.col("b").exp(),934log1p=pl.col("a").log1p(),935)936expected = df.select(937log10=pl.col("b").cast(pl.Float64),938log=pl.Series(np.log(df["a"].cast(pl.Float64).to_numpy())),939exp=pl.Series(np.exp(df["b"].cast(pl.Float64).to_numpy())),940log1p=pl.Series(np.log1p(df["a"].cast(pl.Float64).to_numpy())),941)942943assert_frame_equal(result.collect(), expected)944assert result.collect_schema() == expected.schema945946947@pytest.mark.parametrize(948"dtype",949[950pl.Float64,951pl.Float32,952],953)954def test_log_broadcast(dtype: pl.DataType) -> None:955a = pl.Series("a", [1, 3, 9, 27, 81], dtype=dtype)956b = pl.Series("a", [3, 3, 9, 3, 9], dtype=dtype)957958assert_series_equal(a.log(b), pl.Series("a", [0, 1, 1, 3, 2], dtype=dtype))959assert_series_equal(960a.log(pl.Series("a", [3], dtype=dtype)),961pl.Series("a", [0, 1, 2, 3, 4], dtype=dtype),962)963assert_series_equal(964pl.Series("a", [81], dtype=dtype).log(b),965pl.Series("a", [4, 4, 2, 4, 2], dtype=dtype),966)967968969@pytest.mark.parametrize(970"dtype",971[972pl.Float32,973pl.Int32,974pl.Int64,975],976)977def test_log_broadcast_upcasting(dtype: pl.DataType) -> None:978a = pl.Series("a", [1, 3, 9, 27, 81], dtype=dtype)979b = pl.Series("a", [3, 3, 9, 3, 9], dtype=dtype)980expected = pl.Series("a", [0, 1, 1, 3, 2], dtype=Float64)981982assert_series_equal(a.log(b.cast(Float64)), expected)983assert_series_equal(a.cast(Float64).log(b), expected)984985986