CoCalc -- test_horizontal.py

GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/operations/aggregation/test_horizontal.py
⁶⁹⁴⁰ views
1
from __future__ import annotations
2

3
import datetime
4
from collections import OrderedDict
5
from typing import TYPE_CHECKING, Any
6

7
import pytest
8

9
import polars as pl
10
import polars.selectors as cs
11
from polars.exceptions import ComputeError, PolarsError
12
from polars.testing import assert_frame_equal, assert_series_equal
13

14
if TYPE_CHECKING:
15
    from polars._typing import PolarsDataType
16

17

18
def test_any_expr(fruits_cars: pl.DataFrame) -> None:
19
    assert fruits_cars.select(pl.any_horizontal("A", "B")).to_series()[0] is True
20

21

22
def test_all_any_horizontally() -> None:
23
    df = pl.DataFrame(
24
        [
25
            [False, False, True],
26
            [False, False, True],
27
            [True, False, False],
28
            [False, None, True],
29
            [None, None, False],
30
        ],
31
        schema=["var1", "var2", "var3"],
32
        orient="row",
33
    )
34
    result = df.select(
35
        any=pl.any_horizontal(pl.col("var2"), pl.col("var3")),
36
        all=pl.all_horizontal(pl.col("var2"), pl.col("var3")),
37
    )
38
    expected = pl.DataFrame(
39
        {
40
            "any": [True, True, False, True, None],
41
            "all": [False, False, False, None, False],
42
        }
43
    )
44
    assert_frame_equal(result, expected)
45

46
    # note: a kwargs filter will use an internal call to all_horizontal
47
    dfltr = df.lazy().filter(var1=True, var3=False)
48
    assert dfltr.collect().rows() == [(True, False, False)]
49

50
    # confirm that we reduced the horizontal filter components
51
    # (eg: explain does not contain an "all_horizontal" node)
52
    assert "horizontal" not in dfltr.explain().lower()
53

54

55
def test_empty_all_any_horizontally() -> None:
56
    # any/all_horizontal don't allow empty input, but we can still trigger this
57
    # by selecting an empty set of columns with pl.selectors.
58
    df = pl.DataFrame({"x": [1, 2, 3]})
59
    assert_frame_equal(
60
        df.select(pl.any_horizontal(cs.string().is_null())),
61
        pl.DataFrame({"any_horizontal": False}),
62
    )
63
    assert_frame_equal(
64
        df.select(pl.all_horizontal(cs.string().is_null())),
65
        pl.DataFrame({"all_horizontal": True}),
66
    )
67

68

69
def test_all_any_single_input() -> None:
70
    df = pl.DataFrame({"a": [0, 1, None]})
71
    out = df.select(
72
        all=pl.all_horizontal(pl.col("a")), any=pl.any_horizontal(pl.col("a"))
73
    )
74

75
    expected = pl.DataFrame(
76
        {
77
            "all": [False, True, None],
78
            "any": [False, True, None],
79
        }
80
    )
81
    assert_frame_equal(out, expected)
82

83

84
def test_all_any_accept_expr() -> None:
85
    lf = pl.LazyFrame(
86
        {
87
            "a": [1, None, 2, None],
88
            "b": [1, 2, None, None],
89
        }
90
    )
91

92
    result = lf.select(
93
        pl.any_horizontal(pl.all().is_null()).alias("null_in_row"),
94
        pl.all_horizontal(pl.all().is_null()).alias("all_null_in_row"),
95
    )
96

97
    expected = pl.LazyFrame(
98
        {
99
            "null_in_row": [False, True, True, True],
100
            "all_null_in_row": [False, False, False, True],
101
        }
102
    )
103
    assert_frame_equal(result, expected)
104

105

106
def test_max_min_multiple_columns(fruits_cars: pl.DataFrame) -> None:
107
    result = fruits_cars.select(max=pl.max_horizontal("A", "B"))
108
    expected = pl.Series("max", [5, 4, 3, 4, 5])
109
    assert_series_equal(result.to_series(), expected)
110

111
    result = fruits_cars.select(min=pl.min_horizontal("A", "B"))
112
    expected = pl.Series("min", [1, 2, 3, 2, 1])
113
    assert_series_equal(result.to_series(), expected)
114

115

116
def test_max_min_nulls_consistency() -> None:
117
    df = pl.DataFrame({"a": [None, 2, 3], "b": [4, None, 6], "c": [7, 5, 0]})
118

119
    result = df.select(max=pl.max_horizontal("a", "b", "c")).to_series()
120
    expected = pl.Series("max", [7, 5, 6])
121
    assert_series_equal(result, expected)
122

123
    result = df.select(min=pl.min_horizontal("a", "b", "c")).to_series()
124
    expected = pl.Series("min", [4, 2, 0])
125
    assert_series_equal(result, expected)
126

127

128
def test_nested_min_max() -> None:
129
    df = pl.DataFrame({"a": [1], "b": [2], "c": [3], "d": [4]})
130

131
    result = df.with_columns(
132
        pl.max_horizontal(
133
            pl.min_horizontal("a", "b"), pl.min_horizontal("c", "d")
134
        ).alias("t")
135
    )
136

137
    expected = pl.DataFrame({"a": [1], "b": [2], "c": [3], "d": [4], "t": [3]})
138
    assert_frame_equal(result, expected)
139

140

141
def test_empty_inputs_raise() -> None:
142
    with pytest.raises(
143
        ComputeError,
144
        match="cannot return empty fold because the number of output rows is unknown",
145
    ):
146
        pl.select(pl.any_horizontal())
147

148
    with pytest.raises(
149
        ComputeError,
150
        match="cannot return empty fold because the number of output rows is unknown",
151
    ):
152
        pl.select(pl.all_horizontal())
153

154

155
def test_max_min_wildcard_columns(fruits_cars: pl.DataFrame) -> None:
156
    result = fruits_cars.select(pl.col(pl.datatypes.Int64)).select(
157
        min=pl.min_horizontal("*")
158
    )
159
    expected = pl.Series("min", [1, 2, 3, 2, 1])
160
    assert_series_equal(result.to_series(), expected)
161

162
    result = fruits_cars.select(pl.col(pl.datatypes.Int64)).select(
163
        min=pl.min_horizontal(pl.all())
164
    )
165
    assert_series_equal(result.to_series(), expected)
166

167
    result = fruits_cars.select(pl.col(pl.datatypes.Int64)).select(
168
        max=pl.max_horizontal("*")
169
    )
170
    expected = pl.Series("max", [5, 4, 3, 4, 5])
171
    assert_series_equal(result.to_series(), expected)
172

173
    result = fruits_cars.select(pl.col(pl.datatypes.Int64)).select(
174
        max=pl.max_horizontal(pl.all())
175
    )
176
    assert_series_equal(result.to_series(), expected)
177

178
    result = fruits_cars.select(pl.col(pl.datatypes.Int64)).select(
179
        max=pl.max_horizontal(pl.all(), "A", "*")
180
    )
181
    assert_series_equal(result.to_series(), expected)
182

183

184
@pytest.mark.parametrize(
185
    ("input", "expected_data"),
186
    [
187
        (pl.col("^a|b$"), [1, 2]),
188
        (pl.col("a", "b"), [1, 2]),
189
        (pl.col("a"), [1, 4]),
190
        (pl.lit(5, dtype=pl.Int64), [5]),
191
        (5.0, [5.0]),
192
    ],
193
)
194
def test_min_horizontal_single_input(input: Any, expected_data: list[Any]) -> None:
195
    df = pl.DataFrame({"a": [1, 4], "b": [3, 2]})
196
    result = df.select(min=pl.min_horizontal(input)).to_series()
197
    expected = pl.Series("min", expected_data)
198
    assert_series_equal(result, expected)
199

200

201
@pytest.mark.parametrize(
202
    ("inputs", "expected_data"),
203
    [
204
        ((["a", "b"]), [1, 2]),
205
        (("a", "b"), [1, 2]),
206
        (("a", 3), [1, 3]),
207
    ],
208
)
209
def test_min_horizontal_multi_input(
210
    inputs: tuple[Any, ...], expected_data: list[Any]
211
) -> None:
212
    df = pl.DataFrame({"a": [1, 4], "b": [3, 2]})
213
    result = df.select(min=pl.min_horizontal(*inputs))
214
    expected = pl.DataFrame({"min": expected_data})
215
    assert_frame_equal(result, expected)
216

217

218
@pytest.mark.parametrize(
219
    ("input", "expected_data"),
220
    [
221
        (pl.col("^a|b$"), [3, 4]),
222
        (pl.col("a", "b"), [3, 4]),
223
        (pl.col("a"), [1, 4]),
224
        (pl.lit(5, dtype=pl.Int64), [5]),
225
        (5.0, [5.0]),
226
    ],
227
)
228
def test_max_horizontal_single_input(input: Any, expected_data: list[Any]) -> None:
229
    df = pl.DataFrame({"a": [1, 4], "b": [3, 2]})
230
    result = df.select(max=pl.max_horizontal(input))
231
    expected = pl.DataFrame({"max": expected_data})
232
    assert_frame_equal(result, expected)
233

234

235
@pytest.mark.parametrize(
236
    ("inputs", "expected_data"),
237
    [
238
        ((["a", "b"]), [3, 4]),
239
        (("a", "b"), [3, 4]),
240
        (("a", 3), [3, 4]),
241
    ],
242
)
243
def test_max_horizontal_multi_input(
244
    inputs: tuple[Any, ...], expected_data: list[Any]
245
) -> None:
246
    df = pl.DataFrame({"a": [1, 4], "b": [3, 2]})
247
    result = df.select(max=pl.max_horizontal(*inputs))
248
    expected = pl.DataFrame({"max": expected_data})
249
    assert_frame_equal(result, expected)
250

251

252
def test_expanding_sum() -> None:
253
    df = pl.DataFrame(
254
        {
255
            "x": [0, 1, 2],
256
            "y_1": [1.1, 2.2, 3.3],
257
            "y_2": [1.0, 2.5, 3.5],
258
        }
259
    )
260

261
    result = df.with_columns(pl.sum_horizontal(pl.col(r"^y_.*$")).alias("y_sum"))[
262
        "y_sum"
263
    ]
264
    assert result.to_list() == [2.1, 4.7, 6.8]
265

266

267
def test_sum_max_min() -> None:
268
    df = pl.DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0]})
269
    out = df.select(
270
        sum=pl.sum_horizontal("a", "b"),
271
        max=pl.max_horizontal("a", pl.col("b") ** 2),
272
        min=pl.min_horizontal("a", pl.col("b") ** 2),
273
    )
274
    assert_series_equal(out["sum"], pl.Series("sum", [2.0, 4.0, 6.0]))
275
    assert_series_equal(out["max"], pl.Series("max", [1.0, 4.0, 9.0]))
276
    assert_series_equal(out["min"], pl.Series("min", [1.0, 2.0, 3.0]))
277

278

279
def test_str_sum_horizontal() -> None:
280
    df = pl.DataFrame(
281
        {"A": ["a", "b", None, "c", None], "B": ["f", "g", "h", None, None]}
282
    )
283
    out = df.select(pl.sum_horizontal("A", "B"))
284
    assert_series_equal(out["A"], pl.Series("A", ["af", "bg", "h", "c", ""]))
285

286

287
def test_sum_null_dtype() -> None:
288
    df = pl.DataFrame(
289
        {
290
            "A": [5, None, 3, 2, 1],
291
            "B": [5, 3, None, 2, 1],
292
            "C": [None, None, None, None, None],
293
        }
294
    )
295

296
    assert_series_equal(
297
        df.select(pl.sum_horizontal("A", "B", "C")).to_series(),
298
        pl.Series("A", [10, 3, 3, 4, 2]),
299
    )
300
    assert_series_equal(
301
        df.select(pl.sum_horizontal("C", "B")).to_series(),
302
        pl.Series("C", [5, 3, 0, 2, 1]),
303
    )
304
    assert_series_equal(
305
        df.select(pl.sum_horizontal("C", "C")).to_series(),
306
        pl.Series("C", [None, None, None, None, None]),
307
    )
308

309

310
def test_sum_single_col() -> None:
311
    df = pl.DataFrame(
312
        {
313
            "A": [5, None, 3, None, 1],
314
        }
315
    )
316

317
    assert_series_equal(
318
        df.select(pl.sum_horizontal("A")).to_series(), pl.Series("A", [5, 0, 3, 0, 1])
319
    )
320

321

322
@pytest.mark.parametrize("ignore_nulls", [False, True])
323
def test_sum_correct_supertype(ignore_nulls: bool) -> None:
324
    values = [1, 2] if ignore_nulls else [None, None]  # type: ignore[list-item]
325
    lf = pl.LazyFrame(
326
        {
327
            "null": [None, None],
328
            "int": pl.Series(values, dtype=pl.Int32),
329
            "float": pl.Series(values, dtype=pl.Float32),
330
        }
331
    )
332

333
    # null + int32 should produce int32
334
    out = lf.select(pl.sum_horizontal("null", "int", ignore_nulls=ignore_nulls))
335
    expected = pl.LazyFrame({"null": pl.Series(values, dtype=pl.Int32)})
336
    assert_frame_equal(out.collect(), expected.collect())
337
    assert out.collect_schema() == expected.collect_schema()
338

339
    # null + float32 should produce float32
340
    out = lf.select(pl.sum_horizontal("null", "float", ignore_nulls=ignore_nulls))
341
    expected = pl.LazyFrame({"null": pl.Series(values, dtype=pl.Float32)})
342
    assert_frame_equal(out.collect(), expected.collect())
343
    assert out.collect_schema() == expected.collect_schema()
344

345
    # null + int32 + float32 should produce float64
346
    values = [2, 4] if ignore_nulls else [None, None]  # type: ignore[list-item]
347
    out = lf.select(
348
        pl.sum_horizontal("null", "int", "float", ignore_nulls=ignore_nulls)
349
    )
350
    expected = pl.LazyFrame({"null": pl.Series(values, dtype=pl.Float64)})
351
    assert_frame_equal(out.collect(), expected.collect())
352
    assert out.collect_schema() == expected.collect_schema()
353

354

355
def test_cum_sum_horizontal() -> None:
356
    df = pl.DataFrame(
357
        {
358
            "a": [1, 2],
359
            "b": [3, 4],
360
            "c": [5, 6],
361
        }
362
    )
363
    result = df.select(pl.cum_sum_horizontal("a", "c"))
364
    expected = pl.DataFrame({"cum_sum": [{"a": 1, "c": 6}, {"a": 2, "c": 8}]})
365
    assert_frame_equal(result, expected)
366

367
    q = df.lazy().select(pl.cum_sum_horizontal("a", "c"))
368
    assert q.collect_schema() == q.collect().schema
369

370

371
def test_sum_dtype_12028() -> None:
372
    result = pl.select(
373
        pl.sum_horizontal([pl.duration(seconds=10)]).alias("sum_duration")
374
    )
375
    expected = pl.DataFrame(
376
        [
377
            pl.Series(
378
                "sum_duration",
379
                [datetime.timedelta(seconds=10)],
380
                dtype=pl.Duration(time_unit="us"),
381
            ),
382
        ]
383
    )
384
    assert_frame_equal(expected, result)
385

386

387
def test_horizontal_expr_use_left_name() -> None:
388
    df = pl.DataFrame(
389
        {
390
            "a": [1, 2],
391
            "b": [3, 4],
392
        }
393
    )
394

395
    assert df.select(pl.sum_horizontal("a", "b")).columns == ["a"]
396
    assert df.select(pl.max_horizontal("*")).columns == ["a"]
397
    assert df.select(pl.min_horizontal("b", "a")).columns == ["b"]
398
    assert df.select(pl.any_horizontal("b", "a")).columns == ["b"]
399
    assert df.select(pl.all_horizontal("a", "b")).columns == ["a"]
400

401

402
def test_horizontal_broadcasting() -> None:
403
    df = pl.DataFrame(
404
        {
405
            "a": [1, 3],
406
            "b": [3, 6],
407
        }
408
    )
409

410
    assert_series_equal(
411
        df.select(sum=pl.sum_horizontal(1, "a", "b")).to_series(),
412
        pl.Series("sum", [5, 10]),
413
    )
414
    assert_series_equal(
415
        df.select(mean=pl.mean_horizontal(1, "a", "b")).to_series(),
416
        pl.Series("mean", [1.66666, 3.33333]),
417
    )
418
    assert_series_equal(
419
        df.select(max=pl.max_horizontal(4, "*")).to_series(), pl.Series("max", [4, 6])
420
    )
421
    assert_series_equal(
422
        df.select(min=pl.min_horizontal(2, "b", "a")).to_series(),
423
        pl.Series("min", [1, 2]),
424
    )
425
    assert_series_equal(
426
        df.select(any=pl.any_horizontal(False, pl.Series([True, False]))).to_series(),
427
        pl.Series("any", [True, False]),
428
    )
429
    assert_series_equal(
430
        df.select(all=pl.all_horizontal(True, pl.Series([True, False]))).to_series(),
431
        pl.Series("all", [True, False]),
432
    )
433

434

435
def test_mean_horizontal() -> None:
436
    lf = pl.LazyFrame({"a": [1, 2, 3], "b": [2.0, 4.0, 6.0], "c": [3, None, 9]})
437
    result = lf.select(pl.mean_horizontal(pl.all()).alias("mean"))
438

439
    expected = pl.LazyFrame({"mean": [2.0, 3.0, 6.0]}, schema={"mean": pl.Float64})
440
    assert_frame_equal(result, expected)
441

442

443
def test_mean_horizontal_bool() -> None:
444
    df = pl.DataFrame(
445
        {
446
            "a": [True, False, False],
447
            "b": [None, True, False],
448
            "c": [True, False, False],
449
        }
450
    )
451
    expected = pl.DataFrame({"mean": [1.0, 1 / 3, 0.0]}, schema={"mean": pl.Float64})
452
    result = df.select(mean=pl.mean_horizontal(pl.all()))
453
    assert_frame_equal(result, expected)
454

455

456
def test_mean_horizontal_no_columns() -> None:
457
    lf = pl.LazyFrame({"a": [1, 2, 3], "b": [2.0, 4.0, 6.0], "c": [3, None, 9]})
458

459
    with pytest.raises(ComputeError, match="number of output rows is unknown"):
460
        lf.select(pl.mean_horizontal())
461

462

463
def test_mean_horizontal_no_rows() -> None:
464
    lf = pl.LazyFrame({"a": [], "b": [], "c": []}).with_columns(pl.all().cast(pl.Int64))
465

466
    result = lf.select(pl.mean_horizontal(pl.all()))
467

468
    expected = pl.LazyFrame({"a": []}, schema={"a": pl.Float64})
469
    assert_frame_equal(result, expected)
470

471

472
def test_mean_horizontal_all_null() -> None:
473
    lf = pl.LazyFrame({"a": [1, None], "b": [2, None], "c": [None, None]})
474

475
    result = lf.select(pl.mean_horizontal(pl.all()))
476

477
    expected = pl.LazyFrame({"a": [1.5, None]}, schema={"a": pl.Float64})
478
    assert_frame_equal(result, expected)
479

480

481
@pytest.mark.parametrize(
482
    ("in_dtype", "out_dtype"),
483
    [
484
        (pl.Boolean, pl.Float64),
485
        (pl.UInt8, pl.Float64),
486
        (pl.UInt16, pl.Float64),
487
        (pl.UInt32, pl.Float64),
488
        (pl.UInt64, pl.Float64),
489
        (pl.Int8, pl.Float64),
490
        (pl.Int16, pl.Float64),
491
        (pl.Int32, pl.Float64),
492
        (pl.Int64, pl.Float64),
493
        (pl.Float32, pl.Float32),
494
        (pl.Float64, pl.Float64),
495
    ],
496
)
497
def test_schema_mean_horizontal_single_column(
498
    in_dtype: PolarsDataType,
499
    out_dtype: PolarsDataType,
500
) -> None:
501
    lf = pl.LazyFrame({"a": pl.Series([1, 0]).cast(in_dtype)}).select(
502
        pl.mean_horizontal(pl.all())
503
    )
504

505
    assert lf.collect_schema() == OrderedDict([("a", out_dtype)])
506

507

508
def test_schema_boolean_sum_horizontal() -> None:
509
    lf = pl.LazyFrame({"a": [True, False]}).select(pl.sum_horizontal("a"))
510
    assert lf.collect_schema() == OrderedDict([("a", pl.UInt32)])
511

512

513
def test_fold_all_schema() -> None:
514
    df = pl.DataFrame(
515
        {
516
            "A": [1, 2, 3, 4, 5],
517
            "fruits": ["banana", "banana", "apple", "apple", "banana"],
518
            "B": [5, 4, 3, 2, 1],
519
            "cars": ["beetle", "audi", "beetle", "beetle", "beetle"],
520
            "optional": [28, 300, None, 2, -30],
521
        }
522
    )
523
    # divide because of overflow
524
    result = df.select(pl.sum_horizontal(pl.all().hash(seed=1) // int(1e8)))
525
    assert result.dtypes == [pl.UInt64]
526

527

528
@pytest.mark.parametrize(
529
    "horizontal_func",
530
    [
531
        pl.all_horizontal,
532
        pl.any_horizontal,
533
        pl.max_horizontal,
534
        pl.min_horizontal,
535
        pl.mean_horizontal,
536
        pl.sum_horizontal,
537
    ],
538
)
539
def test_expected_horizontal_dtype_errors(horizontal_func: type[pl.Expr]) -> None:
540
    from decimal import Decimal as D
541

542
    import polars as pl
543

544
    df = pl.DataFrame(
545
        {
546
            "cola": [D("1.5"), D("0.5"), D("5"), D("0"), D("-0.25")],
547
            "colb": [[0, 1], [2], [3, 4], [5], [6]],
548
            "colc": ["aa", "bb", "cc", "dd", "ee"],
549
            "cold": ["bb", "cc", "dd", "ee", "ff"],
550
            "cole": [1000, 2000, 3000, 4000, 5000],
551
        }
552
    )
553
    with pytest.raises(PolarsError):
554
        df.select(
555
            horizontal_func(  # type: ignore[call-arg]
556
                pl.col("cola"),
557
                pl.col("colb"),
558
                pl.col("colc"),
559
                pl.col("cold"),
560
                pl.col("cole"),
561
            )
562
        )
563

564

565
def test_horizontal_sum_boolean_with_null() -> None:
566
    lf = pl.LazyFrame(
567
        {
568
            "null": [None, None],
569
            "bool": [True, False],
570
        }
571
    )
572

573
    out = lf.select(
574
        pl.sum_horizontal("null", "bool").alias("null_first"),
575
        pl.sum_horizontal("bool", "null").alias("bool_first"),
576
    )
577

578
    expected_schema = pl.Schema(
579
        {
580
            "null_first": pl.get_index_type(),
581
            "bool_first": pl.get_index_type(),
582
        }
583
    )
584

585
    assert out.collect_schema() == expected_schema
586

587
    expected_df = pl.DataFrame(
588
        {
589
            "null_first": pl.Series([1, 0], dtype=pl.get_index_type()),
590
            "bool_first": pl.Series([1, 0], dtype=pl.get_index_type()),
591
        }
592
    )
593

594
    assert_frame_equal(out.collect(), expected_df)
595

596

597
@pytest.mark.parametrize("ignore_nulls", [True, False])
598
@pytest.mark.parametrize(
599
    ("dtype_in", "dtype_out"),
600
    [
601
        (pl.Null, pl.Null),
602
        (pl.Boolean, pl.get_index_type()),
603
        (pl.UInt8, pl.UInt8),
604
        (pl.Float32, pl.Float32),
605
        (pl.Float64, pl.Float64),
606
        (pl.Decimal(None, 5), pl.Decimal(None, 5)),
607
    ],
608
)
609
def test_horizontal_sum_with_null_col_ignore_strategy(
610
    dtype_in: PolarsDataType,
611
    dtype_out: PolarsDataType,
612
    ignore_nulls: bool,
613
) -> None:
614
    lf = pl.LazyFrame(
615
        {
616
            "null": [None, None, None],
617
            "s": pl.Series([1, 0, 1], dtype=dtype_in, strict=False),
618
            "s2": pl.Series([1, 0, None], dtype=dtype_in, strict=False),
619
        }
620
    )
621
    result = lf.select(pl.sum_horizontal("null", "s", "s2", ignore_nulls=ignore_nulls))
622
    if ignore_nulls and dtype_in != pl.Null:
623
        values = [2, 0, 1]
624
    else:
625
        values = [None, None, None]  # type: ignore[list-item]
626
    expected = pl.LazyFrame(pl.Series("null", values, dtype=dtype_out))
627
    assert_frame_equal(result, expected)
628
    assert result.collect_schema() == expected.collect_schema()
629

630

631
@pytest.mark.parametrize("ignore_nulls", [True, False])
632
@pytest.mark.parametrize(
633
    ("dtype_in", "dtype_out"),
634
    [
635
        (pl.Null, pl.Float64),
636
        (pl.Boolean, pl.Float64),
637
        (pl.UInt8, pl.Float64),
638
        (pl.Float32, pl.Float32),
639
        (pl.Float64, pl.Float64),
640
    ],
641
)
642
def test_horizontal_mean_with_null_col_ignore_strategy(
643
    dtype_in: PolarsDataType,
644
    dtype_out: PolarsDataType,
645
    ignore_nulls: bool,
646
) -> None:
647
    lf = pl.LazyFrame(
648
        {
649
            "null": [None, None, None],
650
            "s": pl.Series([1, 0, 1], dtype=dtype_in, strict=False),
651
            "s2": pl.Series([1, 0, None], dtype=dtype_in, strict=False),
652
        }
653
    )
654
    result = lf.select(pl.mean_horizontal("null", "s", "s2", ignore_nulls=ignore_nulls))
655
    if ignore_nulls and dtype_in != pl.Null:
656
        values = [1, 0, 1]
657
    else:
658
        values = [None, None, None]  # type: ignore[list-item]
659
    expected = pl.LazyFrame(pl.Series("null", values, dtype=dtype_out))
660
    assert_frame_equal(result, expected)
661

662

663
def test_raise_invalid_types_21835() -> None:
664
    df = pl.DataFrame({"x": [1, 2], "y": ["three", "four"]})
665

666
    with pytest.raises(
667
        ComputeError,
668
        match=r"cannot compare string with numeric type \(i64\)",
669
    ):
670
        df.select(pl.min_horizontal("x", "y"))
671

672
Product

Resources

Company