Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/operations/arithmetic/test_arithmetic.py
8406 views
1
from __future__ import annotations
2
3
import operator
4
from collections import OrderedDict
5
from datetime import date, datetime, timedelta
6
from typing import TYPE_CHECKING, Any
7
8
import numpy as np
9
import pytest
10
11
import polars as pl
12
from polars import (
13
Date,
14
Float64,
15
Int8,
16
Int16,
17
Int32,
18
Int64,
19
UInt8,
20
UInt16,
21
UInt32,
22
UInt64,
23
)
24
from polars.exceptions import ColumnNotFoundError, InvalidOperationError
25
from polars.testing import assert_frame_equal, assert_series_equal
26
from tests.unit.conftest import INTEGER_DTYPES, NUMERIC_DTYPES, UNSIGNED_INTEGER_DTYPES
27
28
if TYPE_CHECKING:
29
from collections.abc import Callable
30
31
from polars._typing import PolarsIntegerType
32
33
34
def test_sqrt_neg_inf() -> None:
35
out = pl.DataFrame(
36
{
37
"val": [float("-Inf"), -9, 0, 9, float("Inf")],
38
}
39
).with_columns(pl.col("val").sqrt().alias("sqrt"))
40
# comparing nans and infinities by string value as they are not cmp
41
assert str(out["sqrt"].to_list()) == str(
42
[float("nan"), float("nan"), 0.0, 3.0, float("Inf")]
43
)
44
45
46
def test_arithmetic_with_logical_on_series_4920() -> None:
47
assert (pl.Series([date(2022, 6, 3)]) - date(2022, 1, 1)).dtype == pl.Duration("us")
48
49
50
@pytest.mark.parametrize(
51
("left", "right", "expected_value", "expected_dtype"),
52
[
53
(date(2021, 1, 1), date(2020, 1, 1), timedelta(days=366), pl.Duration("us")),
54
(
55
datetime(2021, 1, 1),
56
datetime(2020, 1, 1),
57
timedelta(days=366),
58
pl.Duration("us"),
59
),
60
(timedelta(days=1), timedelta(days=2), timedelta(days=-1), pl.Duration("us")),
61
(2.0, 3.0, -1.0, pl.Float64),
62
],
63
)
64
def test_arithmetic_sub(
65
left: object, right: object, expected_value: object, expected_dtype: pl.DataType
66
) -> None:
67
result = left - pl.Series([right])
68
expected = pl.Series("", [expected_value], dtype=expected_dtype)
69
assert_series_equal(result, expected)
70
result = pl.Series([left]) - right
71
assert_series_equal(result, expected)
72
73
74
def test_struct_arithmetic() -> None:
75
df = pl.DataFrame(
76
{
77
"a": [1, 2],
78
"b": [3, 4],
79
"c": [5, 6],
80
}
81
).select(pl.cum_sum_horizontal("a", "c"))
82
83
q = df.lazy().select(pl.col("cum_sum") * 2)
84
out = q.collect()
85
assert out.to_dict(as_series=False) == {
86
"cum_sum": [{"a": 2, "c": 12}, {"a": 4, "c": 16}]
87
}
88
assert q.collect_schema() == out.schema
89
90
q = df.lazy().select(pl.col("cum_sum") - 2)
91
out = q.collect()
92
assert out.to_dict(as_series=False) == {
93
"cum_sum": [{"a": -1, "c": 4}, {"a": 0, "c": 6}]
94
}
95
assert q.collect_schema() == out.schema
96
97
q = df.lazy().select(pl.col("cum_sum") + 2)
98
out = q.collect()
99
assert out.to_dict(as_series=False) == {
100
"cum_sum": [{"a": 3, "c": 8}, {"a": 4, "c": 10}]
101
}
102
assert q.collect_schema() == out.schema
103
104
q = df.lazy().select(pl.col("cum_sum") / 2)
105
out = q.collect()
106
assert out.to_dict(as_series=False) == {
107
"cum_sum": [{"a": 0.5, "c": 3.0}, {"a": 1.0, "c": 4.0}]
108
}
109
assert q.collect_schema() == out.schema
110
111
q = df.lazy().select(pl.col("cum_sum") // 2)
112
out = q.collect()
113
assert out.to_dict(as_series=False) == {
114
"cum_sum": [{"a": 0, "c": 3}, {"a": 1, "c": 4}]
115
}
116
assert q.collect_schema() == out.schema
117
118
# inline, this checks cum_sum reports the right output type
119
assert pl.DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}).select(
120
pl.cum_sum_horizontal("a", "c") * 3
121
).to_dict(as_series=False) == {"cum_sum": [{"a": 3, "c": 18}, {"a": 6, "c": 24}]}
122
123
124
def test_simd_float_sum_determinism() -> None:
125
out = []
126
for _ in range(10):
127
a = pl.Series(
128
[
129
0.021415853782953836,
130
0.06234123511682772,
131
0.016962384922753124,
132
0.002595968402539279,
133
0.007632765529696731,
134
0.012105848332077212,
135
0.021439787151032317,
136
0.3223049133700719,
137
0.10526670729539435,
138
0.0859029285522487,
139
]
140
)
141
out.append(a.sum())
142
143
assert out == [
144
0.6579683924555951,
145
0.6579683924555951,
146
0.6579683924555951,
147
0.6579683924555951,
148
0.6579683924555951,
149
0.6579683924555951,
150
0.6579683924555951,
151
0.6579683924555951,
152
0.6579683924555951,
153
0.6579683924555951,
154
]
155
156
157
def test_floor_division_float_int_consistency() -> None:
158
a = np.random.randn(10) * 10
159
160
assert (pl.Series(a) // 5).to_list() == list(a // 5)
161
assert (pl.Series(a, dtype=pl.Int32) // 5).to_list() == list(
162
(a.astype(int) // 5).astype(int)
163
)
164
165
166
def test_series_expr_arithm() -> None:
167
s = pl.Series([1, 2, 3])
168
assert (s + pl.col("a")).meta == pl.lit(s) + pl.col("a")
169
assert (s - pl.col("a")).meta == pl.lit(s) - pl.col("a")
170
assert (s / pl.col("a")).meta == pl.lit(s) / pl.col("a")
171
assert (s // pl.col("a")).meta == pl.lit(s) // pl.col("a")
172
assert (s * pl.col("a")).meta == pl.lit(s) * pl.col("a")
173
assert (s % pl.col("a")).meta == pl.lit(s) % pl.col("a")
174
175
176
def test_fused_arithm() -> None:
177
df = pl.DataFrame(
178
{
179
"a": [1, 2, 3],
180
"b": [10, 20, 30],
181
"c": [5, 5, 5],
182
}
183
)
184
185
q = df.lazy().select(
186
pl.col("a") * pl.col("b") + pl.col("c"),
187
(pl.col("a") + pl.col("b") * pl.col("c")).alias("2"),
188
)
189
# the extra aliases are because the fma does operation reordering
190
assert (
191
"""col("c").fma([col("a"), col("b")]).alias("a"), col("a").fma([col("b"), col("c")]).alias("2")"""
192
in q.explain()
193
)
194
assert q.collect().to_dict(as_series=False) == {
195
"a": [15, 45, 95],
196
"2": [51, 102, 153],
197
}
198
# fsm
199
q = df.lazy().select(pl.col("a") - pl.col("b") * pl.col("c"))
200
assert """col("a").fsm([col("b"), col("c")])""" in q.explain()
201
assert q.collect()["a"].to_list() == [-49, -98, -147]
202
# fms
203
q = df.lazy().select(pl.col("a") * pl.col("b") - pl.col("c"))
204
assert """col("a").fms([col("b"), col("c")])""" in q.explain()
205
assert q.collect()["a"].to_list() == [5, 35, 85]
206
207
# check if we constant fold instead of fma
208
q = df.lazy().select(pl.lit(1) * pl.lit(2) - pl.col("c"))
209
assert """(2) - (col("c")""" in q.explain()
210
211
# Check if fused is turned off for literals see: #9857
212
for expr in [
213
pl.col("c") * 2 + 5,
214
pl.col("c") * 2 + pl.col("c"),
215
pl.col("c") * 2 - 5,
216
pl.col("c") * 2 - pl.col("c"),
217
5 - pl.col("c") * 2,
218
pl.col("c") - pl.col("c") * 2,
219
]:
220
q = df.lazy().select(expr)
221
assert all(el not in q.explain() for el in ["fms", "fsm", "fma"]), (
222
f"Fused Arithmetic applied on literal {expr}: {q.explain()}"
223
)
224
225
226
def test_literal_no_upcast() -> None:
227
df = pl.DataFrame({"a": pl.Series([1, 2, 3], dtype=pl.Float32)})
228
229
q = (
230
df.lazy()
231
.select(
232
(pl.col("a") * -5 + 2).alias("fma"),
233
(2 - pl.col("a") * 5).alias("fsm"),
234
(pl.col("a") * 5 - 2).alias("fms"),
235
)
236
.collect()
237
)
238
assert set(q.schema.values()) == {pl.Float32}, (
239
"Literal * Column (Float32) should not lead upcast"
240
)
241
242
243
def test_boolean_addition() -> None:
244
s = pl.DataFrame(
245
{"a": [True, False, False], "b": [True, False, True]}
246
).sum_horizontal()
247
248
assert s.dtype == pl.get_index_type()
249
assert s.to_list() == [2, 0, 1]
250
df = pl.DataFrame(
251
{"a": [True], "b": [False]},
252
).select(pl.sum_horizontal("a", "b"))
253
assert df.dtypes == [pl.get_index_type()]
254
255
256
def test_bitwise_6311() -> None:
257
df = pl.DataFrame({"col1": [0, 1, 2, 3], "flag": [0, 0, 0, 0]})
258
259
assert (
260
df.with_columns(
261
pl.when((pl.col("col1") < 1) | (pl.col("col1") >= 3))
262
.then(pl.col("flag") | 2) # set flag b0010
263
.otherwise(pl.col("flag"))
264
).with_columns(
265
pl.when(pl.col("col1") > -1)
266
.then(pl.col("flag") | 4)
267
.otherwise(pl.col("flag"))
268
)
269
).to_dict(as_series=False) == {"col1": [0, 1, 2, 3], "flag": [6, 4, 4, 6]}
270
271
272
def test_arithmetic_null_count() -> None:
273
df = pl.DataFrame({"a": [1, None, 2], "b": [None, 2, 1]})
274
out = df.select(
275
no_broadcast=pl.col("a") + pl.col("b"),
276
broadcast_left=1 + pl.col("b"),
277
broadcast_right=pl.col("a") + 1,
278
)
279
assert out.null_count().to_dict(as_series=False) == {
280
"no_broadcast": [2],
281
"broadcast_left": [1],
282
"broadcast_right": [1],
283
}
284
285
286
@pytest.mark.parametrize(
287
"op",
288
[
289
operator.add,
290
operator.floordiv,
291
operator.mod,
292
operator.mul,
293
operator.sub,
294
],
295
)
296
@pytest.mark.parametrize("dtype", NUMERIC_DTYPES)
297
def test_operator_arithmetic_with_nulls(op: Any, dtype: pl.DataType) -> None:
298
df = pl.DataFrame({"n": [2, 3]}, schema={"n": dtype})
299
s = df.to_series()
300
301
df_expected = pl.DataFrame({"n": [None, None]}, schema={"n": dtype})
302
s_expected = df_expected.to_series()
303
304
# validate expr, frame, and series behaviour with null value arithmetic
305
op_name = op.__name__
306
for null_expr in (None, pl.lit(None)):
307
assert_frame_equal(df_expected, df.select(op(pl.col("n"), null_expr)))
308
assert_frame_equal(
309
df_expected, df.select(getattr(pl.col("n"), op_name)(null_expr))
310
)
311
312
assert_frame_equal(op(df, None), df_expected)
313
assert_series_equal(op(s, None), s_expected)
314
315
316
@pytest.mark.parametrize(
317
"op",
318
[
319
operator.add,
320
operator.mod,
321
operator.mul,
322
operator.sub,
323
],
324
)
325
def test_null_column_arithmetic(op: Any) -> None:
326
df = pl.DataFrame({"a": [None, None], "b": [None, None]})
327
expected_df = pl.DataFrame({"a": [None, None]})
328
329
output_df = df.select(op(pl.col("a"), pl.col("b")))
330
assert_frame_equal(expected_df, output_df)
331
# test broadcast right
332
output_df = df.select(op(pl.col("a"), pl.Series([None])))
333
assert_frame_equal(expected_df, output_df)
334
# test broadcast left
335
output_df = df.select(op(pl.Series("a", [None]), pl.col("a")))
336
assert_frame_equal(expected_df, output_df)
337
338
339
def test_bool_floordiv() -> None:
340
df = pl.DataFrame({"x": [True]})
341
342
with pytest.raises(
343
InvalidOperationError,
344
match="floor_div operation not supported for dtype `bool`",
345
):
346
df.with_columns(pl.col("x").floordiv(2))
347
348
349
def test_arithmetic_in_aggregation_3739() -> None:
350
def demean_dot() -> pl.Expr:
351
x = pl.col("x")
352
y = pl.col("y")
353
x1 = x - x.mean()
354
y1 = y - y.mean()
355
return (x1 * y1).sum().alias("demean_dot")
356
357
assert (
358
pl.DataFrame(
359
{
360
"key": ["a", "a", "a", "a"],
361
"x": [4, 2, 2, 4],
362
"y": [2, 0, 2, 0],
363
}
364
)
365
.group_by("key")
366
.agg(
367
[
368
demean_dot(),
369
]
370
)
371
).to_dict(as_series=False) == {"key": ["a"], "demean_dot": [0.0]}
372
373
374
def test_arithmetic_on_df() -> None:
375
df = pl.DataFrame({"a": [1.0, 2.0], "b": [3.0, 4.0]})
376
377
for df_mul in (df * 2, 2 * df):
378
expected = pl.DataFrame({"a": [2.0, 4.0], "b": [6.0, 8.0]})
379
assert_frame_equal(df_mul, expected)
380
381
for df_plus in (df + 2, 2 + df):
382
expected = pl.DataFrame({"a": [3.0, 4.0], "b": [5.0, 6.0]})
383
assert_frame_equal(df_plus, expected)
384
385
df_div = df / 2
386
expected = pl.DataFrame({"a": [0.5, 1.0], "b": [1.5, 2.0]})
387
assert_frame_equal(df_div, expected)
388
389
df_minus = df - 2
390
expected = pl.DataFrame({"a": [-1.0, 0.0], "b": [1.0, 2.0]})
391
assert_frame_equal(df_minus, expected)
392
393
df_mod = df % 2
394
expected = pl.DataFrame({"a": [1.0, 0.0], "b": [1.0, 0.0]})
395
assert_frame_equal(df_mod, expected)
396
397
df2 = pl.DataFrame({"c": [10]})
398
399
out = df + df2
400
expected = pl.DataFrame({"a": [11.0, None], "b": [None, None]}).with_columns(
401
pl.col("b").cast(pl.Float64)
402
)
403
assert_frame_equal(out, expected)
404
405
out = df - df2
406
expected = pl.DataFrame({"a": [-9.0, None], "b": [None, None]}).with_columns(
407
pl.col("b").cast(pl.Float64)
408
)
409
assert_frame_equal(out, expected)
410
411
out = df / df2
412
expected = pl.DataFrame({"a": [0.1, None], "b": [None, None]}).with_columns(
413
pl.col("b").cast(pl.Float64)
414
)
415
assert_frame_equal(out, expected)
416
417
out = df * df2
418
expected = pl.DataFrame({"a": [10.0, None], "b": [None, None]}).with_columns(
419
pl.col("b").cast(pl.Float64)
420
)
421
assert_frame_equal(out, expected)
422
423
out = df % df2
424
expected = pl.DataFrame({"a": [1.0, None], "b": [None, None]}).with_columns(
425
pl.col("b").cast(pl.Float64)
426
)
427
assert_frame_equal(out, expected)
428
429
# cannot do arithmetic with a sequence
430
with pytest.raises(TypeError, match="operation not supported"):
431
_ = df + [1] # type: ignore[operator]
432
433
434
def test_df_series_division() -> None:
435
df = pl.DataFrame(
436
{
437
"a": [2, 2, 4, 4, 6, 6],
438
"b": [2, 2, 10, 5, 6, 6],
439
}
440
)
441
s = pl.Series([2, 2, 2, 2, 2, 2])
442
assert (df / s).to_dict(as_series=False) == {
443
"a": [1.0, 1.0, 2.0, 2.0, 3.0, 3.0],
444
"b": [1.0, 1.0, 5.0, 2.5, 3.0, 3.0],
445
}
446
assert (df // s).to_dict(as_series=False) == {
447
"a": [1, 1, 2, 2, 3, 3],
448
"b": [1, 1, 5, 2, 3, 3],
449
}
450
451
452
@pytest.mark.parametrize(
453
"s", [pl.Series([1, 2], dtype=Int64), pl.Series([1, 2], dtype=Float64)]
454
)
455
def test_arithmetic_series(s: pl.Series) -> None:
456
a = s
457
b = s
458
459
assert ((a * b) == [1, 4]).sum() == 2
460
assert ((a / b) == [1.0, 1.0]).sum() == 2
461
assert ((a + b) == [2, 4]).sum() == 2
462
assert ((a - b) == [0, 0]).sum() == 2
463
assert ((a + 1) == [2, 3]).sum() == 2
464
assert ((a - 1) == [0, 1]).sum() == 2
465
assert ((a / 1) == [1.0, 2.0]).sum() == 2
466
assert ((a // 2) == [0, 1]).sum() == 2
467
assert ((a * 2) == [2, 4]).sum() == 2
468
assert ((2 + a) == [3, 4]).sum() == 2
469
assert ((1 - a) == [0, -1]).sum() == 2
470
assert ((2 * a) == [2, 4]).sum() == 2
471
472
# integer division
473
assert_series_equal(1 / a, pl.Series([1.0, 0.5]))
474
expected = pl.Series([1, 0]) if s.dtype == Int64 else pl.Series([1.0, 0.5])
475
assert_series_equal(1 // a, expected)
476
# modulo
477
assert ((1 % a) == [0, 1]).sum() == 2
478
assert ((a % 1) == [0, 0]).sum() == 2
479
# negate
480
assert (-a == [-1, -2]).sum() == 2
481
# unary plus
482
assert (+a == a).all()
483
# wrong dtypes in rhs operands
484
assert ((1.0 - a) == [0.0, -1.0]).sum() == 2
485
assert ((1.0 / a) == [1.0, 0.5]).sum() == 2
486
assert ((1.0 * a) == [1, 2]).sum() == 2
487
assert ((1.0 + a) == [2, 3]).sum() == 2
488
assert ((1.0 % a) == [0, 1]).sum() == 2
489
490
491
def test_arithmetic_datetime() -> None:
492
a = pl.Series("a", [datetime(2021, 1, 1)])
493
with pytest.raises(TypeError):
494
a // 2
495
with pytest.raises(TypeError):
496
a / 2
497
with pytest.raises(TypeError):
498
a * 2
499
with pytest.raises(TypeError):
500
a % 2
501
with pytest.raises(
502
InvalidOperationError,
503
):
504
a**2
505
with pytest.raises(TypeError):
506
2 / a
507
with pytest.raises(TypeError):
508
2 // a
509
with pytest.raises(TypeError):
510
2 * a
511
with pytest.raises(TypeError):
512
2 % a
513
with pytest.raises(
514
InvalidOperationError,
515
):
516
2**a
517
518
519
def test_power_series() -> None:
520
a = pl.Series([1, 2], dtype=Int64)
521
b = pl.Series([None, 2.0], dtype=Float64)
522
c = pl.Series([date(2020, 2, 28), date(2020, 3, 1)], dtype=Date)
523
d = pl.Series([1, 2], dtype=UInt8)
524
e = pl.Series([1, 2], dtype=Int8)
525
f = pl.Series([1, 2], dtype=UInt16)
526
g = pl.Series([1, 2], dtype=Int16)
527
h = pl.Series([1, 2], dtype=UInt32)
528
i = pl.Series([1, 2], dtype=Int32)
529
j = pl.Series([1, 2], dtype=UInt64)
530
k = pl.Series([1, 2], dtype=Int64)
531
m = pl.Series([2**33, 2**33], dtype=UInt64)
532
533
# pow
534
assert_series_equal(a**2, pl.Series([1, 4], dtype=Int64))
535
assert_series_equal(b**3, pl.Series([None, 8.0], dtype=Float64))
536
assert_series_equal(a**a, pl.Series([1, 4], dtype=Int64))
537
assert_series_equal(b**b, pl.Series([None, 4.0], dtype=Float64))
538
assert_series_equal(a**b, pl.Series([None, 4.0], dtype=Float64))
539
assert_series_equal(d**d, pl.Series([1, 4], dtype=UInt8))
540
assert_series_equal(e**d, pl.Series([1, 4], dtype=Int8))
541
assert_series_equal(f**d, pl.Series([1, 4], dtype=UInt16))
542
assert_series_equal(g**d, pl.Series([1, 4], dtype=Int16))
543
assert_series_equal(h**d, pl.Series([1, 4], dtype=UInt32))
544
assert_series_equal(i**d, pl.Series([1, 4], dtype=Int32))
545
assert_series_equal(j**d, pl.Series([1, 4], dtype=UInt64))
546
assert_series_equal(k**d, pl.Series([1, 4], dtype=Int64))
547
548
with pytest.raises(
549
InvalidOperationError,
550
match="`pow` operation not supported for dtype `null` as exponent",
551
):
552
a ** pl.lit(None)
553
554
with pytest.raises(
555
InvalidOperationError,
556
match="`pow` operation not supported for dtype `date` as base",
557
):
558
c**2
559
with pytest.raises(
560
InvalidOperationError,
561
match="`pow` operation not supported for dtype `date` as exponent",
562
):
563
2**c
564
565
with pytest.raises(ColumnNotFoundError):
566
a ** "hi" # type: ignore[operator]
567
568
# Raising to UInt64: raises if can't be downcast safely to UInt32...
569
with pytest.raises(
570
InvalidOperationError, match="conversion from `u64` to `u32` failed"
571
):
572
a**m
573
# ... but succeeds otherwise.
574
assert_series_equal(a**j, pl.Series([1, 4], dtype=Int64))
575
576
# rpow
577
assert_series_equal(2.0**a, pl.Series(None, [2.0, 4.0], dtype=Float64))
578
assert_series_equal(2**b, pl.Series(None, [None, 4.0], dtype=Float64))
579
580
with pytest.raises(ColumnNotFoundError):
581
"hi" ** a
582
583
# Series.pow() method
584
assert_series_equal(a.pow(2), pl.Series([1, 4], dtype=Int64))
585
586
587
def test_rpow_name_20071() -> None:
588
result = 1 ** pl.Series("a", [1, 2])
589
expected = pl.Series("a", [1, 1], pl.Int32)
590
assert_series_equal(result, expected)
591
592
593
@pytest.mark.parametrize(
594
("expected", "expr", "column_names"),
595
[
596
(np.array([[2, 4], [6, 8]], dtype=np.int64), lambda a, b: a + b, ("a", "a")),
597
(np.array([[0, 0], [0, 0]], dtype=np.int64), lambda a, b: a - b, ("a", "a")),
598
(np.array([[1, 4], [9, 16]], dtype=np.int64), lambda a, b: a * b, ("a", "a")),
599
(
600
np.array([[1.0, 1.0], [1.0, 1.0]], dtype=np.float64),
601
lambda a, b: a / b,
602
("a", "a"),
603
),
604
(np.array([[0, 0], [0, 0]], dtype=np.int64), lambda a, b: a % b, ("a", "a")),
605
(
606
np.array([[3, 4], [7, 8]], dtype=np.int64),
607
lambda a, b: a + b,
608
("a", "uint8"),
609
),
610
# This fails because the code is buggy, see
611
# https://github.com/pola-rs/polars/issues/17820
612
#
613
# (
614
# np.array([[[2, 4]], [[6, 8]]], dtype=np.int64),
615
# lambda a, b: a + b,
616
# ("nested", "nested"),
617
# ),
618
],
619
)
620
def test_array_arithmetic_same_size(
621
expected: Any,
622
expr: Callable[[pl.Series | pl.Expr, pl.Series | pl.Expr], pl.Series],
623
column_names: tuple[str, str],
624
) -> None:
625
df = pl.DataFrame(
626
[
627
pl.Series("a", np.array([[1, 2], [3, 4]], dtype=np.int64)),
628
pl.Series("uint8", np.array([[2, 2], [4, 4]], dtype=np.uint8)),
629
pl.Series("nested", np.array([[[1, 2]], [[3, 4]]], dtype=np.int64)),
630
]
631
)
632
# Expr-based arithmetic:
633
assert_frame_equal(
634
df.select(expr(pl.col(column_names[0]), pl.col(column_names[1]))),
635
pl.Series(column_names[0], expected).to_frame(),
636
)
637
# Direct arithmetic on the Series:
638
assert_series_equal(
639
expr(df[column_names[0]], df[column_names[1]]),
640
pl.Series(column_names[0], expected),
641
)
642
643
644
def test_schema_owned_arithmetic_5669() -> None:
645
df = (
646
pl.LazyFrame({"A": [1, 2, 3]})
647
.filter(pl.col("A") >= 3)
648
.with_columns(-pl.col("A").alias("B"))
649
.collect()
650
)
651
assert df.columns == ["A", "B"]
652
assert df.rows() == [(3, -3)]
653
654
655
def test_schema_true_divide_6643() -> None:
656
df = pl.DataFrame({"a": [1]})
657
a = pl.col("a")
658
assert df.lazy().select(a / 2).select(pl.col(pl.Int64)).collect().shape == (0, 0)
659
660
661
def test_literal_subtract_schema_13284() -> None:
662
assert (
663
pl.LazyFrame({"a": [23, 30]}, schema={"a": pl.UInt8})
664
.with_columns(pl.col("a") - pl.lit(1))
665
.group_by("a")
666
.len()
667
).collect_schema() == OrderedDict([("a", pl.UInt8), ("len", pl.get_index_type())])
668
669
670
@pytest.mark.parametrize("dtype", INTEGER_DTYPES)
671
def test_int_operator_stability(dtype: pl.DataType) -> None:
672
s = pl.Series(values=[10], dtype=dtype)
673
assert pl.select(pl.lit(s) // 2).dtypes == [dtype]
674
assert pl.select(pl.lit(s) + 2).dtypes == [dtype]
675
assert pl.select(pl.lit(s) - 2).dtypes == [dtype]
676
assert pl.select(pl.lit(s) * 2).dtypes == [dtype]
677
assert pl.select(pl.lit(s) / 2).dtypes == [pl.Float64]
678
679
680
def test_duration_division_schema() -> None:
681
df = pl.DataFrame({"a": [1]})
682
q = (
683
df.lazy()
684
.with_columns(pl.col("a").cast(pl.Duration))
685
.select(pl.col("a") / pl.col("a"))
686
)
687
688
assert q.collect_schema() == {"a": pl.Float64}
689
assert q.collect().to_dict(as_series=False) == {"a": [1.0]}
690
691
692
@pytest.mark.parametrize(
693
("a", "b", "op"),
694
[
695
(pl.Duration, pl.Int32, "+"),
696
(pl.Int32, pl.Duration, "+"),
697
(pl.Time, pl.Int32, "+"),
698
(pl.Int32, pl.Time, "+"),
699
(pl.Date, pl.Int32, "+"),
700
(pl.Int32, pl.Date, "+"),
701
(pl.Datetime, pl.Duration, "*"),
702
(pl.Duration, pl.Datetime, "*"),
703
(pl.Date, pl.Duration, "*"),
704
(pl.Duration, pl.Date, "*"),
705
(pl.Time, pl.Duration, "*"),
706
(pl.Duration, pl.Time, "*"),
707
],
708
)
709
def test_raise_invalid_temporal(a: pl.DataType, b: pl.DataType, op: str) -> None:
710
a = pl.Series("a", [], dtype=a) # type: ignore[assignment]
711
b = pl.Series("b", [], dtype=b) # type: ignore[assignment]
712
_df = pl.DataFrame([a, b])
713
714
with pytest.raises(InvalidOperationError):
715
eval(f"_df.select(pl.col('a') {op} pl.col('b'))")
716
717
718
def test_arithmetic_duration_div_multiply() -> None:
719
df = pl.DataFrame([pl.Series("a", [100, 200, 3000], dtype=pl.Duration)])
720
721
q = df.lazy().with_columns(
722
b=pl.col("a") / 2,
723
c=pl.col("a") / 2.5,
724
d=pl.col("a") * 2,
725
e=pl.col("a") * 2.5,
726
f=pl.col("a") / pl.col("a"), # a constant float
727
)
728
assert q.collect_schema() == pl.Schema(
729
[
730
("a", pl.Duration(time_unit="us")),
731
("b", pl.Duration(time_unit="us")),
732
("c", pl.Duration(time_unit="us")),
733
("d", pl.Duration(time_unit="us")),
734
("e", pl.Duration(time_unit="us")),
735
("f", pl.Float64()),
736
]
737
)
738
assert q.collect().to_dict(as_series=False) == {
739
"a": [
740
timedelta(microseconds=100),
741
timedelta(microseconds=200),
742
timedelta(microseconds=3000),
743
],
744
"b": [
745
timedelta(microseconds=50),
746
timedelta(microseconds=100),
747
timedelta(microseconds=1500),
748
],
749
"c": [
750
timedelta(microseconds=40),
751
timedelta(microseconds=80),
752
timedelta(microseconds=1200),
753
],
754
"d": [
755
timedelta(microseconds=200),
756
timedelta(microseconds=400),
757
timedelta(microseconds=6000),
758
],
759
"e": [
760
timedelta(microseconds=250),
761
timedelta(microseconds=500),
762
timedelta(microseconds=7500),
763
],
764
"f": [1.0, 1.0, 1.0],
765
}
766
767
# rhs
768
769
q = df.lazy().with_columns(
770
b=2 * pl.col("a"),
771
c=2.5 * pl.col("a"),
772
)
773
assert q.collect_schema() == pl.Schema(
774
[
775
("a", pl.Duration(time_unit="us")),
776
("b", pl.Duration(time_unit="us")),
777
("c", pl.Duration(time_unit="us")),
778
]
779
)
780
assert q.collect().to_dict(as_series=False) == {
781
"a": [
782
timedelta(microseconds=100),
783
timedelta(microseconds=200),
784
timedelta(microseconds=3000),
785
],
786
"b": [
787
timedelta(microseconds=200),
788
timedelta(microseconds=400),
789
timedelta(microseconds=6000),
790
],
791
"c": [
792
timedelta(microseconds=250),
793
timedelta(microseconds=500),
794
timedelta(microseconds=7500),
795
],
796
}
797
798
799
def test_invalid_shapes_err() -> None:
800
with pytest.raises(
801
InvalidOperationError,
802
match=r"cannot do arithmetic operation on series of different lengths: got 2 and 3",
803
):
804
pl.Series([1, 2]) + pl.Series([1, 2, 3])
805
806
807
def test_date_datetime_sub() -> None:
808
df = pl.DataFrame({"foo": [date(2020, 1, 1)], "bar": [datetime(2020, 1, 5)]})
809
810
assert df.select(
811
pl.col("foo") - pl.col("bar"),
812
pl.col("bar") - pl.col("foo"),
813
).to_dict(as_series=False) == {
814
"foo": [timedelta(days=-4)],
815
"bar": [timedelta(days=4)],
816
}
817
818
819
def test_time_time_sub() -> None:
820
df = pl.DataFrame(
821
{
822
"foo": pl.Series([-1, 0, 10]).cast(pl.Datetime("us")),
823
"bar": pl.Series([1, 0, 1]).cast(pl.Datetime("us")),
824
}
825
)
826
827
assert df.select(
828
pl.col("foo").dt.time() - pl.col("bar").dt.time(),
829
pl.col("bar").dt.time() - pl.col("foo").dt.time(),
830
).to_dict(as_series=False) == {
831
"foo": [
832
timedelta(days=1, microseconds=-2),
833
timedelta(0),
834
timedelta(microseconds=9),
835
],
836
"bar": [
837
timedelta(days=-1, microseconds=2),
838
timedelta(0),
839
timedelta(microseconds=-9),
840
],
841
}
842
843
844
def test_raise_invalid_shape() -> None:
845
with pytest.raises(InvalidOperationError):
846
pl.DataFrame([[1, 2], [3, 4]]) * pl.DataFrame([1, 2, 3])
847
848
849
def test_integer_divide_scalar_zero_lhs_19142() -> None:
850
assert_series_equal(pl.Series([0]) // pl.Series([1, 0]), pl.Series([0, None]))
851
assert_series_equal(pl.Series([0]) % pl.Series([1, 0]), pl.Series([0, None]))
852
853
854
def test_compound_duration_21389() -> None:
855
# test add
856
lf = pl.LazyFrame(
857
{
858
"ts": datetime(2024, 1, 1, 1, 2, 3),
859
"duration": timedelta(days=1),
860
}
861
)
862
result = lf.select(pl.col("ts") + pl.col("duration") * 2)
863
expected_schema = pl.Schema({"ts": pl.Datetime(time_unit="us", time_zone=None)})
864
expected = pl.DataFrame({"ts": datetime(2024, 1, 3, 1, 2, 3)})
865
assert result.collect_schema() == expected_schema
866
assert_frame_equal(result.collect(), expected)
867
868
# test subtract
869
result = lf.select(pl.col("ts") - pl.col("duration") * 2)
870
expected_schema = pl.Schema({"ts": pl.Datetime(time_unit="us", time_zone=None)})
871
expected = pl.DataFrame({"ts": datetime(2023, 12, 30, 1, 2, 3)})
872
assert result.collect_schema() == expected_schema
873
assert_frame_equal(result.collect(), expected)
874
875
876
@pytest.mark.parametrize("dtype", INTEGER_DTYPES)
877
def test_arithmetic_i128(dtype: PolarsIntegerType) -> None:
878
s = pl.Series("a", [0, 1, 127], dtype=dtype, strict=False)
879
s128 = pl.Series("a", [0, 0, 0], dtype=pl.Int128)
880
expected = pl.Series("a", [0, 1, 127], dtype=pl.Int128)
881
assert_series_equal(s + s128, expected)
882
assert_series_equal(s128 + s, expected)
883
884
885
def test_arithmetic_i128_nonint() -> None:
886
s128 = pl.Series("a", [0], dtype=pl.Int128)
887
888
s = pl.Series("a", [1.0], dtype=pl.Float32)
889
assert_series_equal(s + s128, pl.Series("a", [1.0], dtype=pl.Float64))
890
assert_series_equal(s128 + s, pl.Series("a", [1.0], dtype=pl.Float64))
891
892
s = pl.Series("a", [1.0], dtype=pl.Float64)
893
assert_series_equal(s + s128, s)
894
assert_series_equal(s128 + s, s)
895
896
s = pl.Series("a", [True], dtype=pl.Boolean)
897
assert_series_equal(s + s128, pl.Series("a", [1], dtype=pl.Int128))
898
assert_series_equal(s128 + s, pl.Series("a", [1], dtype=pl.Int128))
899
900
901
@pytest.mark.parametrize("dtype", INTEGER_DTYPES)
902
def test_arithmetic_u128(dtype: PolarsIntegerType) -> None:
903
s = pl.Series("a", [0, 1, 127], dtype=dtype, strict=False)
904
s128 = pl.Series("a", [0, 0, 0], dtype=pl.UInt128)
905
expected_dtype = pl.UInt128 if dtype in UNSIGNED_INTEGER_DTYPES else pl.Int128
906
expected = pl.Series("a", [0, 1, 127], dtype=expected_dtype)
907
assert_series_equal(s + s128, expected)
908
assert_series_equal(s128 + s, expected)
909
910
911
def test_arithmetic_u128_nonint() -> None:
912
s128 = pl.Series("a", [0], dtype=pl.UInt128)
913
914
s = pl.Series("a", [1.0], dtype=pl.Float32)
915
assert_series_equal(s + s128, pl.Series("a", [1.0], dtype=pl.Float64))
916
assert_series_equal(s128 + s, pl.Series("a", [1.0], dtype=pl.Float64))
917
918
s = pl.Series("a", [1.0], dtype=pl.Float64)
919
assert_series_equal(s + s128, s)
920
assert_series_equal(s128 + s, s)
921
922
s = pl.Series("a", [True], dtype=pl.Boolean)
923
assert_series_equal(s + s128, pl.Series("a", [1], dtype=pl.UInt128))
924
assert_series_equal(s128 + s, pl.Series("a", [1], dtype=pl.UInt128))
925
926
927
def test_float_truediv_output_type() -> None:
928
lf = pl.LazyFrame(schema={"f32": pl.Float32, "f64": pl.Float64})
929
assert lf.select(x=pl.col("f32") / pl.col("f32")).collect_schema() == pl.Schema(
930
{"x": pl.Float32}
931
)
932
assert lf.select(x=pl.col("f32") / pl.col("f64")).collect_schema() == pl.Schema(
933
{"x": pl.Float64}
934
)
935
assert lf.select(x=pl.col("f64") / pl.col("f32")).collect_schema() == pl.Schema(
936
{"x": pl.Float64}
937
)
938
assert lf.select(x=pl.col("f64") / pl.col("f64")).collect_schema() == pl.Schema(
939
{"x": pl.Float64}
940
)
941
942
943
@pytest.mark.parametrize(
944
"dtype",
945
[
946
pl.Float64,
947
pl.Int32,
948
pl.Decimal(21, 3),
949
],
950
)
951
def test_log_exp(dtype: pl.DataType) -> None:
952
df = pl.DataFrame(
953
{
954
"a": pl.Series("a", [1, 100, 1000], dtype=dtype),
955
"b": pl.Series("a", [0, 2, 3], dtype=dtype),
956
}
957
)
958
959
result = df.lazy().select(
960
log10=pl.col("a").log10(),
961
log=pl.col("a").log(),
962
exp=pl.col("b").exp(),
963
log1p=pl.col("a").log1p(),
964
)
965
expected = df.select(
966
log10=pl.col("b").cast(pl.Float64),
967
log=pl.Series(np.log(df["a"].cast(pl.Float64).to_numpy())),
968
exp=pl.Series(np.exp(df["b"].cast(pl.Float64).to_numpy())),
969
log1p=pl.Series(np.log1p(df["a"].cast(pl.Float64).to_numpy())),
970
)
971
972
assert_frame_equal(result.collect(), expected)
973
assert result.collect_schema() == expected.schema
974
975
976
@pytest.mark.parametrize(
977
"dtype",
978
[
979
pl.Float64,
980
pl.Float32,
981
],
982
)
983
def test_log_broadcast(dtype: pl.DataType) -> None:
984
a = pl.Series("a", [1, 3, 9, 27, 81], dtype=dtype)
985
b = pl.Series("a", [3, 3, 9, 3, 9], dtype=dtype)
986
987
assert_series_equal(a.log(b), pl.Series("a", [0, 1, 1, 3, 2], dtype=dtype))
988
assert_series_equal(
989
a.log(pl.Series("a", [3], dtype=dtype)),
990
pl.Series("a", [0, 1, 2, 3, 4], dtype=dtype),
991
)
992
assert_series_equal(
993
pl.Series("a", [81], dtype=dtype).log(b),
994
pl.Series("a", [4, 4, 2, 4, 2], dtype=dtype),
995
)
996
997