Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/operations/arithmetic/test_arithmetic.py
6940 views
1
from __future__ import annotations
2
3
import operator
4
from collections import OrderedDict
5
from datetime import date, datetime, timedelta
6
from typing import TYPE_CHECKING, Any, Callable
7
8
import numpy as np
9
import pytest
10
11
import polars as pl
12
from polars import (
13
Date,
14
Float64,
15
Int8,
16
Int16,
17
Int32,
18
Int64,
19
UInt8,
20
UInt16,
21
UInt32,
22
UInt64,
23
)
24
from polars.exceptions import ColumnNotFoundError, InvalidOperationError
25
from polars.testing import assert_frame_equal, assert_series_equal
26
from tests.unit.conftest import INTEGER_DTYPES, NUMERIC_DTYPES
27
28
if TYPE_CHECKING:
29
from polars._typing import PolarsIntegerType
30
31
32
def test_sqrt_neg_inf() -> None:
33
out = pl.DataFrame(
34
{
35
"val": [float("-Inf"), -9, 0, 9, float("Inf")],
36
}
37
).with_columns(pl.col("val").sqrt().alias("sqrt"))
38
# comparing nans and infinities by string value as they are not cmp
39
assert str(out["sqrt"].to_list()) == str(
40
[float("nan"), float("nan"), 0.0, 3.0, float("Inf")]
41
)
42
43
44
def test_arithmetic_with_logical_on_series_4920() -> None:
45
assert (pl.Series([date(2022, 6, 3)]) - date(2022, 1, 1)).dtype == pl.Duration("us")
46
47
48
@pytest.mark.parametrize(
49
("left", "right", "expected_value", "expected_dtype"),
50
[
51
(date(2021, 1, 1), date(2020, 1, 1), timedelta(days=366), pl.Duration("us")),
52
(
53
datetime(2021, 1, 1),
54
datetime(2020, 1, 1),
55
timedelta(days=366),
56
pl.Duration("us"),
57
),
58
(timedelta(days=1), timedelta(days=2), timedelta(days=-1), pl.Duration("us")),
59
(2.0, 3.0, -1.0, pl.Float64),
60
],
61
)
62
def test_arithmetic_sub(
63
left: object, right: object, expected_value: object, expected_dtype: pl.DataType
64
) -> None:
65
result = left - pl.Series([right])
66
expected = pl.Series("", [expected_value], dtype=expected_dtype)
67
assert_series_equal(result, expected)
68
result = pl.Series([left]) - right
69
assert_series_equal(result, expected)
70
71
72
def test_struct_arithmetic() -> None:
73
df = pl.DataFrame(
74
{
75
"a": [1, 2],
76
"b": [3, 4],
77
"c": [5, 6],
78
}
79
).select(pl.cum_sum_horizontal("a", "c"))
80
81
q = df.lazy().select(pl.col("cum_sum") * 2)
82
out = q.collect()
83
assert out.to_dict(as_series=False) == {
84
"cum_sum": [{"a": 2, "c": 12}, {"a": 4, "c": 16}]
85
}
86
assert q.collect_schema() == out.schema
87
88
q = df.lazy().select(pl.col("cum_sum") - 2)
89
out = q.collect()
90
assert out.to_dict(as_series=False) == {
91
"cum_sum": [{"a": -1, "c": 4}, {"a": 0, "c": 6}]
92
}
93
assert q.collect_schema() == out.schema
94
95
q = df.lazy().select(pl.col("cum_sum") + 2)
96
out = q.collect()
97
assert out.to_dict(as_series=False) == {
98
"cum_sum": [{"a": 3, "c": 8}, {"a": 4, "c": 10}]
99
}
100
assert q.collect_schema() == out.schema
101
102
q = df.lazy().select(pl.col("cum_sum") / 2)
103
out = q.collect()
104
assert out.to_dict(as_series=False) == {
105
"cum_sum": [{"a": 0.5, "c": 3.0}, {"a": 1.0, "c": 4.0}]
106
}
107
assert q.collect_schema() == out.schema
108
109
q = df.lazy().select(pl.col("cum_sum") // 2)
110
out = q.collect()
111
assert out.to_dict(as_series=False) == {
112
"cum_sum": [{"a": 0, "c": 3}, {"a": 1, "c": 4}]
113
}
114
assert q.collect_schema() == out.schema
115
116
# inline, this checks cum_sum reports the right output type
117
assert pl.DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}).select(
118
pl.cum_sum_horizontal("a", "c") * 3
119
).to_dict(as_series=False) == {"cum_sum": [{"a": 3, "c": 18}, {"a": 6, "c": 24}]}
120
121
122
def test_simd_float_sum_determinism() -> None:
123
out = []
124
for _ in range(10):
125
a = pl.Series(
126
[
127
0.021415853782953836,
128
0.06234123511682772,
129
0.016962384922753124,
130
0.002595968402539279,
131
0.007632765529696731,
132
0.012105848332077212,
133
0.021439787151032317,
134
0.3223049133700719,
135
0.10526670729539435,
136
0.0859029285522487,
137
]
138
)
139
out.append(a.sum())
140
141
assert out == [
142
0.6579683924555951,
143
0.6579683924555951,
144
0.6579683924555951,
145
0.6579683924555951,
146
0.6579683924555951,
147
0.6579683924555951,
148
0.6579683924555951,
149
0.6579683924555951,
150
0.6579683924555951,
151
0.6579683924555951,
152
]
153
154
155
def test_floor_division_float_int_consistency() -> None:
156
a = np.random.randn(10) * 10
157
158
assert (pl.Series(a) // 5).to_list() == list(a // 5)
159
assert (pl.Series(a, dtype=pl.Int32) // 5).to_list() == list(
160
(a.astype(int) // 5).astype(int)
161
)
162
163
164
def test_series_expr_arithm() -> None:
165
s = pl.Series([1, 2, 3])
166
assert (s + pl.col("a")).meta == pl.lit(s) + pl.col("a")
167
assert (s - pl.col("a")).meta == pl.lit(s) - pl.col("a")
168
assert (s / pl.col("a")).meta == pl.lit(s) / pl.col("a")
169
assert (s // pl.col("a")).meta == pl.lit(s) // pl.col("a")
170
assert (s * pl.col("a")).meta == pl.lit(s) * pl.col("a")
171
assert (s % pl.col("a")).meta == pl.lit(s) % pl.col("a")
172
173
174
def test_fused_arithm() -> None:
175
df = pl.DataFrame(
176
{
177
"a": [1, 2, 3],
178
"b": [10, 20, 30],
179
"c": [5, 5, 5],
180
}
181
)
182
183
q = df.lazy().select(
184
pl.col("a") * pl.col("b") + pl.col("c"),
185
(pl.col("a") + pl.col("b") * pl.col("c")).alias("2"),
186
)
187
# the extra aliases are because the fma does operation reordering
188
assert (
189
"""col("c").fma([col("a"), col("b")]).alias("a"), col("a").fma([col("b"), col("c")]).alias("2")"""
190
in q.explain()
191
)
192
assert q.collect().to_dict(as_series=False) == {
193
"a": [15, 45, 95],
194
"2": [51, 102, 153],
195
}
196
# fsm
197
q = df.lazy().select(pl.col("a") - pl.col("b") * pl.col("c"))
198
assert """col("a").fsm([col("b"), col("c")])""" in q.explain()
199
assert q.collect()["a"].to_list() == [-49, -98, -147]
200
# fms
201
q = df.lazy().select(pl.col("a") * pl.col("b") - pl.col("c"))
202
assert """col("a").fms([col("b"), col("c")])""" in q.explain()
203
assert q.collect()["a"].to_list() == [5, 35, 85]
204
205
# check if we constant fold instead of fma
206
q = df.lazy().select(pl.lit(1) * pl.lit(2) - pl.col("c"))
207
assert """(2) - (col("c")""" in q.explain()
208
209
# Check if fused is turned off for literals see: #9857
210
for expr in [
211
pl.col("c") * 2 + 5,
212
pl.col("c") * 2 + pl.col("c"),
213
pl.col("c") * 2 - 5,
214
pl.col("c") * 2 - pl.col("c"),
215
5 - pl.col("c") * 2,
216
pl.col("c") - pl.col("c") * 2,
217
]:
218
q = df.lazy().select(expr)
219
assert all(el not in q.explain() for el in ["fms", "fsm", "fma"]), (
220
f"Fused Arithmetic applied on literal {expr}: {q.explain()}"
221
)
222
223
224
def test_literal_no_upcast() -> None:
225
df = pl.DataFrame({"a": pl.Series([1, 2, 3], dtype=pl.Float32)})
226
227
q = (
228
df.lazy()
229
.select(
230
(pl.col("a") * -5 + 2).alias("fma"),
231
(2 - pl.col("a") * 5).alias("fsm"),
232
(pl.col("a") * 5 - 2).alias("fms"),
233
)
234
.collect()
235
)
236
assert set(q.schema.values()) == {pl.Float32}, (
237
"Literal * Column (Float32) should not lead upcast"
238
)
239
240
241
def test_boolean_addition() -> None:
242
s = pl.DataFrame(
243
{"a": [True, False, False], "b": [True, False, True]}
244
).sum_horizontal()
245
246
assert s.dtype == pl.get_index_type()
247
assert s.to_list() == [2, 0, 1]
248
df = pl.DataFrame(
249
{"a": [True], "b": [False]},
250
).select(pl.sum_horizontal("a", "b"))
251
assert df.dtypes == [pl.get_index_type()]
252
253
254
def test_bitwise_6311() -> None:
255
df = pl.DataFrame({"col1": [0, 1, 2, 3], "flag": [0, 0, 0, 0]})
256
257
assert (
258
df.with_columns(
259
pl.when((pl.col("col1") < 1) | (pl.col("col1") >= 3))
260
.then(pl.col("flag") | 2) # set flag b0010
261
.otherwise(pl.col("flag"))
262
).with_columns(
263
pl.when(pl.col("col1") > -1)
264
.then(pl.col("flag") | 4)
265
.otherwise(pl.col("flag"))
266
)
267
).to_dict(as_series=False) == {"col1": [0, 1, 2, 3], "flag": [6, 4, 4, 6]}
268
269
270
def test_arithmetic_null_count() -> None:
271
df = pl.DataFrame({"a": [1, None, 2], "b": [None, 2, 1]})
272
out = df.select(
273
no_broadcast=pl.col("a") + pl.col("b"),
274
broadcast_left=1 + pl.col("b"),
275
broadcast_right=pl.col("a") + 1,
276
)
277
assert out.null_count().to_dict(as_series=False) == {
278
"no_broadcast": [2],
279
"broadcast_left": [1],
280
"broadcast_right": [1],
281
}
282
283
284
@pytest.mark.parametrize(
285
"op",
286
[
287
operator.add,
288
operator.floordiv,
289
operator.mod,
290
operator.mul,
291
operator.sub,
292
],
293
)
294
@pytest.mark.parametrize("dtype", NUMERIC_DTYPES)
295
def test_operator_arithmetic_with_nulls(op: Any, dtype: pl.DataType) -> None:
296
df = pl.DataFrame({"n": [2, 3]}, schema={"n": dtype})
297
s = df.to_series()
298
299
df_expected = pl.DataFrame({"n": [None, None]}, schema={"n": dtype})
300
s_expected = df_expected.to_series()
301
302
# validate expr, frame, and series behaviour with null value arithmetic
303
op_name = op.__name__
304
for null_expr in (None, pl.lit(None)):
305
assert_frame_equal(df_expected, df.select(op(pl.col("n"), null_expr)))
306
assert_frame_equal(
307
df_expected, df.select(getattr(pl.col("n"), op_name)(null_expr))
308
)
309
310
assert_frame_equal(op(df, None), df_expected)
311
assert_series_equal(op(s, None), s_expected)
312
313
314
@pytest.mark.parametrize(
315
"op",
316
[
317
operator.add,
318
operator.mod,
319
operator.mul,
320
operator.sub,
321
],
322
)
323
def test_null_column_arithmetic(op: Any) -> None:
324
df = pl.DataFrame({"a": [None, None], "b": [None, None]})
325
expected_df = pl.DataFrame({"a": [None, None]})
326
327
output_df = df.select(op(pl.col("a"), pl.col("b")))
328
assert_frame_equal(expected_df, output_df)
329
# test broadcast right
330
output_df = df.select(op(pl.col("a"), pl.Series([None])))
331
assert_frame_equal(expected_df, output_df)
332
# test broadcast left
333
output_df = df.select(op(pl.Series("a", [None]), pl.col("a")))
334
assert_frame_equal(expected_df, output_df)
335
336
337
def test_bool_floordiv() -> None:
338
df = pl.DataFrame({"x": [True]})
339
340
with pytest.raises(
341
InvalidOperationError,
342
match="floor_div operation not supported for dtype `bool`",
343
):
344
df.with_columns(pl.col("x").floordiv(2))
345
346
347
def test_arithmetic_in_aggregation_3739() -> None:
348
def demean_dot() -> pl.Expr:
349
x = pl.col("x")
350
y = pl.col("y")
351
x1 = x - x.mean()
352
y1 = y - y.mean()
353
return (x1 * y1).sum().alias("demean_dot")
354
355
assert (
356
pl.DataFrame(
357
{
358
"key": ["a", "a", "a", "a"],
359
"x": [4, 2, 2, 4],
360
"y": [2, 0, 2, 0],
361
}
362
)
363
.group_by("key")
364
.agg(
365
[
366
demean_dot(),
367
]
368
)
369
).to_dict(as_series=False) == {"key": ["a"], "demean_dot": [0.0]}
370
371
372
def test_arithmetic_on_df() -> None:
373
df = pl.DataFrame({"a": [1.0, 2.0], "b": [3.0, 4.0]})
374
375
for df_mul in (df * 2, 2 * df):
376
expected = pl.DataFrame({"a": [2.0, 4.0], "b": [6.0, 8.0]})
377
assert_frame_equal(df_mul, expected)
378
379
for df_plus in (df + 2, 2 + df):
380
expected = pl.DataFrame({"a": [3.0, 4.0], "b": [5.0, 6.0]})
381
assert_frame_equal(df_plus, expected)
382
383
df_div = df / 2
384
expected = pl.DataFrame({"a": [0.5, 1.0], "b": [1.5, 2.0]})
385
assert_frame_equal(df_div, expected)
386
387
df_minus = df - 2
388
expected = pl.DataFrame({"a": [-1.0, 0.0], "b": [1.0, 2.0]})
389
assert_frame_equal(df_minus, expected)
390
391
df_mod = df % 2
392
expected = pl.DataFrame({"a": [1.0, 0.0], "b": [1.0, 0.0]})
393
assert_frame_equal(df_mod, expected)
394
395
df2 = pl.DataFrame({"c": [10]})
396
397
out = df + df2
398
expected = pl.DataFrame({"a": [11.0, None], "b": [None, None]}).with_columns(
399
pl.col("b").cast(pl.Float64)
400
)
401
assert_frame_equal(out, expected)
402
403
out = df - df2
404
expected = pl.DataFrame({"a": [-9.0, None], "b": [None, None]}).with_columns(
405
pl.col("b").cast(pl.Float64)
406
)
407
assert_frame_equal(out, expected)
408
409
out = df / df2
410
expected = pl.DataFrame({"a": [0.1, None], "b": [None, None]}).with_columns(
411
pl.col("b").cast(pl.Float64)
412
)
413
assert_frame_equal(out, expected)
414
415
out = df * df2
416
expected = pl.DataFrame({"a": [10.0, None], "b": [None, None]}).with_columns(
417
pl.col("b").cast(pl.Float64)
418
)
419
assert_frame_equal(out, expected)
420
421
out = df % df2
422
expected = pl.DataFrame({"a": [1.0, None], "b": [None, None]}).with_columns(
423
pl.col("b").cast(pl.Float64)
424
)
425
assert_frame_equal(out, expected)
426
427
# cannot do arithmetic with a sequence
428
with pytest.raises(TypeError, match="operation not supported"):
429
_ = df + [1] # type: ignore[operator]
430
431
432
def test_df_series_division() -> None:
433
df = pl.DataFrame(
434
{
435
"a": [2, 2, 4, 4, 6, 6],
436
"b": [2, 2, 10, 5, 6, 6],
437
}
438
)
439
s = pl.Series([2, 2, 2, 2, 2, 2])
440
assert (df / s).to_dict(as_series=False) == {
441
"a": [1.0, 1.0, 2.0, 2.0, 3.0, 3.0],
442
"b": [1.0, 1.0, 5.0, 2.5, 3.0, 3.0],
443
}
444
assert (df // s).to_dict(as_series=False) == {
445
"a": [1, 1, 2, 2, 3, 3],
446
"b": [1, 1, 5, 2, 3, 3],
447
}
448
449
450
@pytest.mark.parametrize(
451
"s", [pl.Series([1, 2], dtype=Int64), pl.Series([1, 2], dtype=Float64)]
452
)
453
def test_arithmetic_series(s: pl.Series) -> None:
454
a = s
455
b = s
456
457
assert ((a * b) == [1, 4]).sum() == 2
458
assert ((a / b) == [1.0, 1.0]).sum() == 2
459
assert ((a + b) == [2, 4]).sum() == 2
460
assert ((a - b) == [0, 0]).sum() == 2
461
assert ((a + 1) == [2, 3]).sum() == 2
462
assert ((a - 1) == [0, 1]).sum() == 2
463
assert ((a / 1) == [1.0, 2.0]).sum() == 2
464
assert ((a // 2) == [0, 1]).sum() == 2
465
assert ((a * 2) == [2, 4]).sum() == 2
466
assert ((2 + a) == [3, 4]).sum() == 2
467
assert ((1 - a) == [0, -1]).sum() == 2
468
assert ((2 * a) == [2, 4]).sum() == 2
469
470
# integer division
471
assert_series_equal(1 / a, pl.Series([1.0, 0.5]))
472
expected = pl.Series([1, 0]) if s.dtype == Int64 else pl.Series([1.0, 0.5])
473
assert_series_equal(1 // a, expected)
474
# modulo
475
assert ((1 % a) == [0, 1]).sum() == 2
476
assert ((a % 1) == [0, 0]).sum() == 2
477
# negate
478
assert (-a == [-1, -2]).sum() == 2
479
# unary plus
480
assert (+a == a).all()
481
# wrong dtypes in rhs operands
482
assert ((1.0 - a) == [0.0, -1.0]).sum() == 2
483
assert ((1.0 / a) == [1.0, 0.5]).sum() == 2
484
assert ((1.0 * a) == [1, 2]).sum() == 2
485
assert ((1.0 + a) == [2, 3]).sum() == 2
486
assert ((1.0 % a) == [0, 1]).sum() == 2
487
488
489
def test_arithmetic_datetime() -> None:
490
a = pl.Series("a", [datetime(2021, 1, 1)])
491
with pytest.raises(TypeError):
492
a // 2
493
with pytest.raises(TypeError):
494
a / 2
495
with pytest.raises(TypeError):
496
a * 2
497
with pytest.raises(TypeError):
498
a % 2
499
with pytest.raises(
500
InvalidOperationError,
501
):
502
a**2
503
with pytest.raises(TypeError):
504
2 / a
505
with pytest.raises(TypeError):
506
2 // a
507
with pytest.raises(TypeError):
508
2 * a
509
with pytest.raises(TypeError):
510
2 % a
511
with pytest.raises(
512
InvalidOperationError,
513
):
514
2**a
515
516
517
def test_power_series() -> None:
518
a = pl.Series([1, 2], dtype=Int64)
519
b = pl.Series([None, 2.0], dtype=Float64)
520
c = pl.Series([date(2020, 2, 28), date(2020, 3, 1)], dtype=Date)
521
d = pl.Series([1, 2], dtype=UInt8)
522
e = pl.Series([1, 2], dtype=Int8)
523
f = pl.Series([1, 2], dtype=UInt16)
524
g = pl.Series([1, 2], dtype=Int16)
525
h = pl.Series([1, 2], dtype=UInt32)
526
i = pl.Series([1, 2], dtype=Int32)
527
j = pl.Series([1, 2], dtype=UInt64)
528
k = pl.Series([1, 2], dtype=Int64)
529
m = pl.Series([2**33, 2**33], dtype=UInt64)
530
531
# pow
532
assert_series_equal(a**2, pl.Series([1, 4], dtype=Int64))
533
assert_series_equal(b**3, pl.Series([None, 8.0], dtype=Float64))
534
assert_series_equal(a**a, pl.Series([1, 4], dtype=Int64))
535
assert_series_equal(b**b, pl.Series([None, 4.0], dtype=Float64))
536
assert_series_equal(a**b, pl.Series([None, 4.0], dtype=Float64))
537
assert_series_equal(d**d, pl.Series([1, 4], dtype=UInt8))
538
assert_series_equal(e**d, pl.Series([1, 4], dtype=Int8))
539
assert_series_equal(f**d, pl.Series([1, 4], dtype=UInt16))
540
assert_series_equal(g**d, pl.Series([1, 4], dtype=Int16))
541
assert_series_equal(h**d, pl.Series([1, 4], dtype=UInt32))
542
assert_series_equal(i**d, pl.Series([1, 4], dtype=Int32))
543
assert_series_equal(j**d, pl.Series([1, 4], dtype=UInt64))
544
assert_series_equal(k**d, pl.Series([1, 4], dtype=Int64))
545
546
with pytest.raises(
547
InvalidOperationError,
548
match="`pow` operation not supported for dtype `null` as exponent",
549
):
550
a ** pl.lit(None)
551
552
with pytest.raises(
553
InvalidOperationError,
554
match="`pow` operation not supported for dtype `date` as base",
555
):
556
c**2
557
with pytest.raises(
558
InvalidOperationError,
559
match="`pow` operation not supported for dtype `date` as exponent",
560
):
561
2**c
562
563
with pytest.raises(ColumnNotFoundError):
564
a ** "hi" # type: ignore[operator]
565
566
# Raising to UInt64: raises if can't be downcast safely to UInt32...
567
with pytest.raises(
568
InvalidOperationError, match="conversion from `u64` to `u32` failed"
569
):
570
a**m
571
# ... but succeeds otherwise.
572
assert_series_equal(a**j, pl.Series([1, 4], dtype=Int64))
573
574
# rpow
575
assert_series_equal(2.0**a, pl.Series(None, [2.0, 4.0], dtype=Float64))
576
assert_series_equal(2**b, pl.Series(None, [None, 4.0], dtype=Float64))
577
578
with pytest.raises(ColumnNotFoundError):
579
"hi" ** a
580
581
# Series.pow() method
582
assert_series_equal(a.pow(2), pl.Series([1, 4], dtype=Int64))
583
584
585
def test_rpow_name_20071() -> None:
586
result = 1 ** pl.Series("a", [1, 2])
587
expected = pl.Series("a", [1, 1], pl.Int32)
588
assert_series_equal(result, expected)
589
590
591
@pytest.mark.parametrize(
592
("expected", "expr", "column_names"),
593
[
594
(np.array([[2, 4], [6, 8]], dtype=np.int64), lambda a, b: a + b, ("a", "a")),
595
(np.array([[0, 0], [0, 0]], dtype=np.int64), lambda a, b: a - b, ("a", "a")),
596
(np.array([[1, 4], [9, 16]], dtype=np.int64), lambda a, b: a * b, ("a", "a")),
597
(
598
np.array([[1.0, 1.0], [1.0, 1.0]], dtype=np.float64),
599
lambda a, b: a / b,
600
("a", "a"),
601
),
602
(np.array([[0, 0], [0, 0]], dtype=np.int64), lambda a, b: a % b, ("a", "a")),
603
(
604
np.array([[3, 4], [7, 8]], dtype=np.int64),
605
lambda a, b: a + b,
606
("a", "uint8"),
607
),
608
# This fails because the code is buggy, see
609
# https://github.com/pola-rs/polars/issues/17820
610
#
611
# (
612
# np.array([[[2, 4]], [[6, 8]]], dtype=np.int64),
613
# lambda a, b: a + b,
614
# ("nested", "nested"),
615
# ),
616
],
617
)
618
def test_array_arithmetic_same_size(
619
expected: Any,
620
expr: Callable[[pl.Series | pl.Expr, pl.Series | pl.Expr], pl.Series],
621
column_names: tuple[str, str],
622
) -> None:
623
df = pl.DataFrame(
624
[
625
pl.Series("a", np.array([[1, 2], [3, 4]], dtype=np.int64)),
626
pl.Series("uint8", np.array([[2, 2], [4, 4]], dtype=np.uint8)),
627
pl.Series("nested", np.array([[[1, 2]], [[3, 4]]], dtype=np.int64)),
628
]
629
)
630
# Expr-based arithmetic:
631
assert_frame_equal(
632
df.select(expr(pl.col(column_names[0]), pl.col(column_names[1]))),
633
pl.Series(column_names[0], expected).to_frame(),
634
)
635
# Direct arithmetic on the Series:
636
assert_series_equal(
637
expr(df[column_names[0]], df[column_names[1]]),
638
pl.Series(column_names[0], expected),
639
)
640
641
642
def test_schema_owned_arithmetic_5669() -> None:
643
df = (
644
pl.LazyFrame({"A": [1, 2, 3]})
645
.filter(pl.col("A") >= 3)
646
.with_columns(-pl.col("A").alias("B"))
647
.collect()
648
)
649
assert df.columns == ["A", "B"]
650
assert df.rows() == [(3, -3)]
651
652
653
def test_schema_true_divide_6643() -> None:
654
df = pl.DataFrame({"a": [1]})
655
a = pl.col("a")
656
assert df.lazy().select(a / 2).select(pl.col(pl.Int64)).collect().shape == (0, 0)
657
658
659
def test_literal_subtract_schema_13284() -> None:
660
assert (
661
pl.LazyFrame({"a": [23, 30]}, schema={"a": pl.UInt8})
662
.with_columns(pl.col("a") - pl.lit(1))
663
.group_by("a")
664
.len()
665
).collect_schema() == OrderedDict([("a", pl.UInt8), ("len", pl.UInt32)])
666
667
668
@pytest.mark.parametrize("dtype", INTEGER_DTYPES)
669
def test_int_operator_stability(dtype: pl.DataType) -> None:
670
s = pl.Series(values=[10], dtype=dtype)
671
assert pl.select(pl.lit(s) // 2).dtypes == [dtype]
672
assert pl.select(pl.lit(s) + 2).dtypes == [dtype]
673
assert pl.select(pl.lit(s) - 2).dtypes == [dtype]
674
assert pl.select(pl.lit(s) * 2).dtypes == [dtype]
675
assert pl.select(pl.lit(s) / 2).dtypes == [pl.Float64]
676
677
678
def test_duration_division_schema() -> None:
679
df = pl.DataFrame({"a": [1]})
680
q = (
681
df.lazy()
682
.with_columns(pl.col("a").cast(pl.Duration))
683
.select(pl.col("a") / pl.col("a"))
684
)
685
686
assert q.collect_schema() == {"a": pl.Float64}
687
assert q.collect().to_dict(as_series=False) == {"a": [1.0]}
688
689
690
@pytest.mark.parametrize(
691
("a", "b", "op"),
692
[
693
(pl.Duration, pl.Int32, "+"),
694
(pl.Int32, pl.Duration, "+"),
695
(pl.Time, pl.Int32, "+"),
696
(pl.Int32, pl.Time, "+"),
697
(pl.Date, pl.Int32, "+"),
698
(pl.Int32, pl.Date, "+"),
699
(pl.Datetime, pl.Duration, "*"),
700
(pl.Duration, pl.Datetime, "*"),
701
(pl.Date, pl.Duration, "*"),
702
(pl.Duration, pl.Date, "*"),
703
(pl.Time, pl.Duration, "*"),
704
(pl.Duration, pl.Time, "*"),
705
],
706
)
707
def test_raise_invalid_temporal(a: pl.DataType, b: pl.DataType, op: str) -> None:
708
a = pl.Series("a", [], dtype=a) # type: ignore[assignment]
709
b = pl.Series("b", [], dtype=b) # type: ignore[assignment]
710
_df = pl.DataFrame([a, b])
711
712
with pytest.raises(InvalidOperationError):
713
eval(f"_df.select(pl.col('a') {op} pl.col('b'))")
714
715
716
def test_arithmetic_duration_div_multiply() -> None:
717
df = pl.DataFrame([pl.Series("a", [100, 200, 3000], dtype=pl.Duration)])
718
719
q = df.lazy().with_columns(
720
b=pl.col("a") / 2,
721
c=pl.col("a") / 2.5,
722
d=pl.col("a") * 2,
723
e=pl.col("a") * 2.5,
724
f=pl.col("a") / pl.col("a"), # a constant float
725
)
726
assert q.collect_schema() == pl.Schema(
727
[
728
("a", pl.Duration(time_unit="us")),
729
("b", pl.Duration(time_unit="us")),
730
("c", pl.Duration(time_unit="us")),
731
("d", pl.Duration(time_unit="us")),
732
("e", pl.Duration(time_unit="us")),
733
("f", pl.Float64()),
734
]
735
)
736
assert q.collect().to_dict(as_series=False) == {
737
"a": [
738
timedelta(microseconds=100),
739
timedelta(microseconds=200),
740
timedelta(microseconds=3000),
741
],
742
"b": [
743
timedelta(microseconds=50),
744
timedelta(microseconds=100),
745
timedelta(microseconds=1500),
746
],
747
"c": [
748
timedelta(microseconds=40),
749
timedelta(microseconds=80),
750
timedelta(microseconds=1200),
751
],
752
"d": [
753
timedelta(microseconds=200),
754
timedelta(microseconds=400),
755
timedelta(microseconds=6000),
756
],
757
"e": [
758
timedelta(microseconds=250),
759
timedelta(microseconds=500),
760
timedelta(microseconds=7500),
761
],
762
"f": [1.0, 1.0, 1.0],
763
}
764
765
# rhs
766
767
q = df.lazy().with_columns(
768
b=2 * pl.col("a"),
769
c=2.5 * pl.col("a"),
770
)
771
assert q.collect_schema() == pl.Schema(
772
[
773
("a", pl.Duration(time_unit="us")),
774
("b", pl.Duration(time_unit="us")),
775
("c", pl.Duration(time_unit="us")),
776
]
777
)
778
assert q.collect().to_dict(as_series=False) == {
779
"a": [
780
timedelta(microseconds=100),
781
timedelta(microseconds=200),
782
timedelta(microseconds=3000),
783
],
784
"b": [
785
timedelta(microseconds=200),
786
timedelta(microseconds=400),
787
timedelta(microseconds=6000),
788
],
789
"c": [
790
timedelta(microseconds=250),
791
timedelta(microseconds=500),
792
timedelta(microseconds=7500),
793
],
794
}
795
796
797
def test_invalid_shapes_err() -> None:
798
with pytest.raises(
799
InvalidOperationError,
800
match=r"cannot do arithmetic operation on series of different lengths: got 2 and 3",
801
):
802
pl.Series([1, 2]) + pl.Series([1, 2, 3])
803
804
805
def test_date_datetime_sub() -> None:
806
df = pl.DataFrame({"foo": [date(2020, 1, 1)], "bar": [datetime(2020, 1, 5)]})
807
808
assert df.select(
809
pl.col("foo") - pl.col("bar"),
810
pl.col("bar") - pl.col("foo"),
811
).to_dict(as_series=False) == {
812
"foo": [timedelta(days=-4)],
813
"bar": [timedelta(days=4)],
814
}
815
816
817
def test_time_time_sub() -> None:
818
df = pl.DataFrame(
819
{
820
"foo": pl.Series([-1, 0, 10]).cast(pl.Datetime("us")),
821
"bar": pl.Series([1, 0, 1]).cast(pl.Datetime("us")),
822
}
823
)
824
825
assert df.select(
826
pl.col("foo").dt.time() - pl.col("bar").dt.time(),
827
pl.col("bar").dt.time() - pl.col("foo").dt.time(),
828
).to_dict(as_series=False) == {
829
"foo": [
830
timedelta(days=1, microseconds=-2),
831
timedelta(0),
832
timedelta(microseconds=9),
833
],
834
"bar": [
835
timedelta(days=-1, microseconds=2),
836
timedelta(0),
837
timedelta(microseconds=-9),
838
],
839
}
840
841
842
def test_raise_invalid_shape() -> None:
843
with pytest.raises(InvalidOperationError):
844
pl.DataFrame([[1, 2], [3, 4]]) * pl.DataFrame([1, 2, 3])
845
846
847
def test_integer_divide_scalar_zero_lhs_19142() -> None:
848
assert_series_equal(pl.Series([0]) // pl.Series([1, 0]), pl.Series([0, None]))
849
assert_series_equal(pl.Series([0]) % pl.Series([1, 0]), pl.Series([0, None]))
850
851
852
def test_compound_duration_21389() -> None:
853
# test add
854
lf = pl.LazyFrame(
855
{
856
"ts": datetime(2024, 1, 1, 1, 2, 3),
857
"duration": timedelta(days=1),
858
}
859
)
860
result = lf.select(pl.col("ts") + pl.col("duration") * 2)
861
expected_schema = pl.Schema({"ts": pl.Datetime(time_unit="us", time_zone=None)})
862
expected = pl.DataFrame({"ts": datetime(2024, 1, 3, 1, 2, 3)})
863
assert result.collect_schema() == expected_schema
864
assert_frame_equal(result.collect(), expected)
865
866
# test subtract
867
result = lf.select(pl.col("ts") - pl.col("duration") * 2)
868
expected_schema = pl.Schema({"ts": pl.Datetime(time_unit="us", time_zone=None)})
869
expected = pl.DataFrame({"ts": datetime(2023, 12, 30, 1, 2, 3)})
870
assert result.collect_schema() == expected_schema
871
assert_frame_equal(result.collect(), expected)
872
873
874
@pytest.mark.parametrize("dtype", INTEGER_DTYPES)
875
def test_arithmetic_i128(dtype: PolarsIntegerType) -> None:
876
s = pl.Series("a", [0, 1, 127], dtype=dtype, strict=False)
877
s128 = pl.Series("a", [0, 0, 0], dtype=pl.Int128)
878
expected = pl.Series("a", [0, 1, 127], dtype=pl.Int128)
879
assert_series_equal(s + s128, expected)
880
assert_series_equal(s128 + s, expected)
881
882
883
def test_arithmetic_i128_nonint() -> None:
884
s128 = pl.Series("a", [0], dtype=pl.Int128)
885
886
s = pl.Series("a", [1.0], dtype=pl.Float32)
887
assert_series_equal(s + s128, pl.Series("a", [1.0], dtype=pl.Float64))
888
assert_series_equal(s128 + s, pl.Series("a", [1.0], dtype=pl.Float64))
889
890
s = pl.Series("a", [1.0], dtype=pl.Float64)
891
assert_series_equal(s + s128, s)
892
assert_series_equal(s128 + s, s)
893
894
s = pl.Series("a", [True], dtype=pl.Boolean)
895
assert_series_equal(s + s128, pl.Series("a", [1], dtype=pl.Int128))
896
assert_series_equal(s128 + s, pl.Series("a", [1], dtype=pl.Int128))
897
898
899
def test_float_truediv_output_type() -> None:
900
lf = pl.LazyFrame(schema={"f32": pl.Float32, "f64": pl.Float64})
901
assert lf.select(x=pl.col("f32") / pl.col("f32")).collect_schema() == pl.Schema(
902
{"x": pl.Float32}
903
)
904
assert lf.select(x=pl.col("f32") / pl.col("f64")).collect_schema() == pl.Schema(
905
{"x": pl.Float64}
906
)
907
assert lf.select(x=pl.col("f64") / pl.col("f32")).collect_schema() == pl.Schema(
908
{"x": pl.Float64}
909
)
910
assert lf.select(x=pl.col("f64") / pl.col("f64")).collect_schema() == pl.Schema(
911
{"x": pl.Float64}
912
)
913
914
915
@pytest.mark.parametrize(
916
"dtype",
917
[
918
pl.Float64,
919
pl.Int32,
920
pl.Decimal(21, 3),
921
],
922
)
923
def test_log_exp(dtype: pl.DataType) -> None:
924
df = pl.DataFrame(
925
{
926
"a": pl.Series("a", [1, 100, 1000], dtype=dtype),
927
"b": pl.Series("a", [0, 2, 3], dtype=dtype),
928
}
929
)
930
931
result = df.lazy().select(
932
log10=pl.col("a").log10(),
933
log=pl.col("a").log(),
934
exp=pl.col("b").exp(),
935
log1p=pl.col("a").log1p(),
936
)
937
expected = df.select(
938
log10=pl.col("b").cast(pl.Float64),
939
log=pl.Series(np.log(df["a"].cast(pl.Float64).to_numpy())),
940
exp=pl.Series(np.exp(df["b"].cast(pl.Float64).to_numpy())),
941
log1p=pl.Series(np.log1p(df["a"].cast(pl.Float64).to_numpy())),
942
)
943
944
assert_frame_equal(result.collect(), expected)
945
assert result.collect_schema() == expected.schema
946
947
948
@pytest.mark.parametrize(
949
"dtype",
950
[
951
pl.Float64,
952
pl.Float32,
953
],
954
)
955
def test_log_broadcast(dtype: pl.DataType) -> None:
956
a = pl.Series("a", [1, 3, 9, 27, 81], dtype=dtype)
957
b = pl.Series("a", [3, 3, 9, 3, 9], dtype=dtype)
958
959
assert_series_equal(a.log(b), pl.Series("a", [0, 1, 1, 3, 2], dtype=dtype))
960
assert_series_equal(
961
a.log(pl.Series("a", [3], dtype=dtype)),
962
pl.Series("a", [0, 1, 2, 3, 4], dtype=dtype),
963
)
964
assert_series_equal(
965
pl.Series("a", [81], dtype=dtype).log(b),
966
pl.Series("a", [4, 4, 2, 4, 2], dtype=dtype),
967
)
968
969
970
@pytest.mark.parametrize(
971
"dtype",
972
[
973
pl.Float32,
974
pl.Int32,
975
pl.Int64,
976
],
977
)
978
def test_log_broadcast_upcasting(dtype: pl.DataType) -> None:
979
a = pl.Series("a", [1, 3, 9, 27, 81], dtype=dtype)
980
b = pl.Series("a", [3, 3, 9, 3, 9], dtype=dtype)
981
expected = pl.Series("a", [0, 1, 1, 3, 2], dtype=Float64)
982
983
assert_series_equal(a.log(b.cast(Float64)), expected)
984
assert_series_equal(a.cast(Float64).log(b), expected)
985
986