CoCalc -- test_errors.py

GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/meta/test_errors.py
⁸⁴¹⁰ views
1
from __future__ import annotations
2

3
import io
4
from datetime import date, datetime, time, tzinfo
5
from decimal import Decimal
6
from typing import TYPE_CHECKING, Any
7

8
import numpy as np
9
import pandas as pd
10
import pytest
11

12
import polars as pl
13
from polars.datatypes.convert import dtype_to_py_type
14
from polars.exceptions import (
15
    ColumnNotFoundError,
16
    ComputeError,
17
    InvalidOperationError,
18
    OutOfBoundsError,
19
    SchemaError,
20
    SchemaFieldNotFoundError,
21
    ShapeError,
22
    StructFieldNotFoundError,
23
)
24
from polars.testing import assert_frame_equal
25
from tests.unit.conftest import TEMPORAL_DTYPES
26

27
if TYPE_CHECKING:
28
    from polars._typing import ConcatMethod
29

30

31
def test_error_on_empty_group_by() -> None:
32
    with pytest.raises(
33
        ComputeError, match="at least one key is required in a group_by operation"
34
    ):
35
        pl.DataFrame({"x": [0, 0, 1, 1]}).group_by([]).agg(pl.len())
36

37

38
def test_error_on_reducing_map() -> None:
39
    df = pl.DataFrame(
40
        {"id": [0, 0, 0, 1, 1, 1], "t": [2, 4, 5, 10, 11, 14], "y": [0, 1, 1, 2, 3, 4]}
41
    )
42
    with pytest.raises(
43
        TypeError,
44
        match=r"`map` with `returns_scalar=False`",
45
    ):
46
        df.group_by("id").agg(
47
            pl.map_batches(["t", "y"], np.mean, return_dtype=pl.Float64)
48
        )
49

50
    df = pl.DataFrame({"x": [1, 2, 3, 4], "group": [1, 2, 1, 2]})
51
    with pytest.raises(TypeError, match=r"`map` with `returns_scalar=False`"):
52
        df.select(
53
            pl.col("x")
54
            .map_batches(
55
                lambda x: x.cut(breaks=[1, 2, 3], include_breaks=True).struct.unnest(),
56
                is_elementwise=True,
57
                return_dtype=pl.Struct(
58
                    {"breakpoint": pl.Int64, "cat": pl.Categorical()}
59
                ),
60
            )
61
            .over("group")
62
        )
63

64
    assert_frame_equal(
65
        df.select(
66
            pl.col("x")
67
            .map_batches(
68
                lambda x: x.cut(breaks=[1, 2, 3], include_breaks=True),
69
                is_elementwise=True,
70
            )
71
            .struct.unnest()
72
            .over("group")
73
        ),
74
        pl.DataFrame(
75
            {
76
                "breakpoint": [1.0, 2.0, 3.0, float("inf")],
77
                "category": ["(-inf, 1]", "(1, 2]", "(2, 3]", "(3, inf]"],
78
            },
79
            schema_overrides={"category": pl.Categorical()},
80
        ),
81
    )
82

83

84
def test_error_on_invalid_by_in_asof_join() -> None:
85
    df1 = pl.DataFrame(
86
        {
87
            "a": ["a", "b", "a"],
88
            "b": [1, 2, 3],
89
            "c": ["a", "b", "a"],
90
        }
91
    ).set_sorted("b")
92

93
    df2 = df1.with_columns(pl.col("a").cast(pl.Categorical))
94
    with pytest.raises(ComputeError):
95
        df1.join_asof(df2, on="b", by=["a", "c"])
96

97

98
@pytest.mark.parametrize("dtype", TEMPORAL_DTYPES)
99
def test_error_on_invalid_series_init(dtype: pl.DataType) -> None:
100
    py_type = dtype_to_py_type(dtype)
101
    with pytest.raises(
102
        TypeError,
103
        match=f"'float' object cannot be interpreted as a {py_type.__name__!r}",
104
    ):
105
        pl.Series([1.5, 2.0, 3.75], dtype=dtype)
106

107

108
def test_error_on_invalid_series_init2() -> None:
109
    with pytest.raises(TypeError, match="unexpected value"):
110
        pl.Series([1.5, 2.0, 3.75], dtype=pl.Int32)
111

112

113
def test_error_on_invalid_struct_field() -> None:
114
    with pytest.raises(StructFieldNotFoundError):
115
        pl.struct(
116
            [pl.Series("a", [1, 2]), pl.Series("b", ["a", "b"])], eager=True
117
        ).struct.field("z")
118

119

120
def test_not_found_error() -> None:
121
    csv = "a,b,c\n2,1,1"
122
    df = pl.read_csv(io.StringIO(csv))
123
    with pytest.raises(ColumnNotFoundError):
124
        df.select("d")
125

126

127
def test_string_numeric_comp_err() -> None:
128
    with pytest.raises(ComputeError, match="cannot compare string with numeric type"):
129
        pl.DataFrame({"a": [1.1, 21, 31, 21, 51, 61, 71, 81]}).select(pl.col("a") < "9")
130

131

132
def test_panic_error() -> None:
133
    with pytest.raises(
134
        InvalidOperationError,
135
        match="unit: 'k' not supported",
136
    ):
137
        pl.datetime_range(
138
            start=datetime(2021, 12, 16),
139
            end=datetime(2021, 12, 16, 3),
140
            interval="99k",
141
            eager=True,
142
        )
143

144

145
def test_join_lazy_on_df() -> None:
146
    df_left = pl.DataFrame(
147
        {
148
            "Id": [1, 2, 3, 4],
149
            "Names": ["A", "B", "C", "D"],
150
        }
151
    )
152
    df_right = pl.DataFrame({"Id": [1, 3], "Tags": ["xxx", "yyy"]})
153

154
    with pytest.raises(
155
        TypeError,
156
        match=r"expected `other` .*to be a 'LazyFrame'.* not 'DataFrame'",
157
    ):
158
        df_left.lazy().join(df_right, on="Id")  # type: ignore[arg-type]
159

160
    with pytest.raises(
161
        TypeError,
162
        match=r"expected `other` .*to be a 'LazyFrame'.* not 'DataFrame'",
163
    ):
164
        df_left.lazy().join_asof(df_right, on="Id")  # type: ignore[arg-type]
165

166
    with pytest.raises(
167
        TypeError,
168
        match=r"expected `other` .*to be a 'LazyFrame'.* not 'pandas.*DataFrame'",
169
    ):
170
        df_left.lazy().join_asof(df_right.to_pandas(), on="Id")  # type: ignore[arg-type]
171

172

173
def test_projection_update_schema_missing_column() -> None:
174
    with pytest.raises(
175
        ColumnNotFoundError,
176
        match='unable to find column "colC"',
177
    ):
178
        (
179
            pl.DataFrame({"colA": ["a", "b", "c"], "colB": [1, 2, 3]})
180
            .lazy()
181
            .filter(~pl.col("colC").is_null())
182
            .group_by(["colA"])
183
            .agg([pl.col("colB").sum().alias("result")])
184
            .collect()
185
        )
186

187

188
def test_not_found_on_rename() -> None:
189
    df = pl.DataFrame({"exists": [1, 2, 3]})
190

191
    err_type = (SchemaFieldNotFoundError, ColumnNotFoundError)
192
    with pytest.raises(err_type):
193
        df.rename({"does_not_exist": "exists"})
194

195
    with pytest.raises(err_type):
196
        df.select(pl.col("does_not_exist").alias("new_name"))
197

198

199
def test_getitem_errs() -> None:
200
    df = pl.DataFrame({"a": [1, 2, 3]})
201

202
    with pytest.raises(
203
        TypeError,
204
        match=r"cannot select columns using key of type 'set': {'some'}",
205
    ):
206
        df[{"some"}]  # type: ignore[call-overload]
207

208
    with pytest.raises(
209
        TypeError,
210
        match=r"cannot select elements using key of type 'set': {'strange'}",
211
    ):
212
        df["a"][{"strange"}]  # type: ignore[call-overload]
213

214
    with pytest.raises(
215
        TypeError,
216
        match=r"cannot use `__setitem__` on DataFrame with key {'some'} of type 'set' and value 'foo' of type 'str'",
217
    ):
218
        df[{"some"}] = "foo"  # type: ignore[index]
219

220

221
def test_err_bubbling_up_to_lit() -> None:
222
    df = pl.DataFrame({"date": [date(2020, 1, 1)], "value": [42]})
223

224
    with pytest.raises(TypeError):
225
        df.filter(pl.col("date") == pl.Date("2020-01-01"))  # type: ignore[call-arg,operator]
226

227

228
def test_filter_not_of_type_bool() -> None:
229
    df = pl.DataFrame({"json_val": ['{"a":"hello"}', None, '{"a":"world"}']})
230
    with pytest.raises(
231
        InvalidOperationError, match="filter predicate must be of type `Boolean`, got"
232
    ):
233
        df.filter(pl.col("json_val").str.json_path_match("$.a"))
234

235

236
def test_is_nan_on_non_boolean() -> None:
237
    with pytest.raises(InvalidOperationError):
238
        pl.Series(["1", "2", "3"]).fill_nan("2")  # type: ignore[arg-type]
239

240

241
@pytest.mark.may_fail_cloud  # reason: eager - return_dtype must be set
242
def test_window_expression_different_group_length() -> None:
243
    try:
244
        pl.DataFrame({"groups": ["a", "a", "b", "a", "b"]}).select(
245
            pl.col("groups").map_elements(lambda _: pl.Series([1, 2])).over("groups")
246
        )
247
    except ShapeError as exc:
248
        msg = str(exc)
249
        assert (
250
            "the length of the window expression did not match that of the group" in msg
251
        )
252
        assert "group:" in msg
253
        assert "group length:" in msg
254
        assert "output: 'shape:" in msg
255

256

257
def test_invalid_concat_type_err() -> None:
258
    df = pl.DataFrame(
259
        {
260
            "foo": [1, 2],
261
            "bar": [6, 7],
262
            "ham": ["a", "b"],
263
        }
264
    )
265
    with pytest.raises(
266
        ValueError,
267
        match=r"DataFrame `how` must be one of {'vertical', '.+', 'align_right'}, got 'sausage'",
268
    ):
269
        pl.concat([df, df], how="sausage")  # type: ignore[arg-type]
270

271

272
@pytest.mark.parametrize("how", ["horizontal", "diagonal"])
273
def test_series_concat_err(how: ConcatMethod) -> None:
274
    s = pl.Series([1, 2, 3])
275
    with pytest.raises(
276
        ValueError,
277
        match="Series only supports 'vertical' concat strategy",
278
    ):
279
        pl.concat([s, s], how=how)
280

281

282
def test_invalid_sort_by() -> None:
283
    df = pl.DataFrame(
284
        {
285
            "a": ["bill", "bob", "jen", "allie", "george"],
286
            "b": ["M", "M", "F", "F", "M"],
287
            "c": [32, 40, 20, 19, 39],
288
        }
289
    )
290

291
    # `select a where b order by c desc`
292
    with pytest.raises(ShapeError):
293
        df.select(pl.col("a").filter(pl.col("b") == "M").sort_by("c", descending=True))
294

295

296
def test_epoch_time_type() -> None:
297
    with pytest.raises(
298
        InvalidOperationError,
299
        match="`timestamp` operation not supported for dtype `time`",
300
    ):
301
        pl.Series([time(0, 0, 1)]).dt.epoch("s")
302

303

304
def test_duplicate_columns_arg_csv() -> None:
305
    f = io.BytesIO()
306
    pl.DataFrame({"x": [1, 2, 3], "y": ["a", "b", "c"]}).write_csv(f)
307
    f.seek(0)
308
    with pytest.raises(
309
        ValueError, match=r"`columns` arg should only have unique values"
310
    ):
311
        pl.read_csv(f, columns=["x", "x", "y"])
312

313

314
def test_datetime_time_add_err() -> None:
315
    with pytest.raises(SchemaError, match="failed to determine supertype"):
316
        pl.Series([datetime(1970, 1, 1, 0, 0, 1)]) + pl.Series([time(0, 0, 2)])
317

318

319
def test_invalid_dtype() -> None:
320
    with pytest.raises(
321
        TypeError,
322
        match=r"cannot parse input of type 'str' into Polars data type \(given: 'mayonnaise'\)",
323
    ):
324
        pl.Series([1, 2], dtype="mayonnaise")  # type: ignore[arg-type]
325

326
    with pytest.raises(
327
        TypeError,
328
        match=r"cannot parse input <class 'datetime\.tzinfo'> into Polars data type",
329
    ):
330
        pl.Series([None], dtype=tzinfo)  # type: ignore[arg-type]
331

332

333
def test_arr_eval_named_cols() -> None:
334
    df = pl.DataFrame({"A": ["a", "b"], "B": [["a", "b"], ["c", "d"]]})
335

336
    with pytest.raises(ComputeError):
337
        df.select(pl.col("B").list.eval(pl.element().append(pl.col("A"))))
338

339

340
def test_alias_in_join_keys() -> None:
341
    df = pl.DataFrame({"A": ["a", "b"], "B": [["a", "b"], ["c", "d"]]})
342
    with pytest.raises(
343
        InvalidOperationError,
344
        match=r"'alias' is not allowed in a join key, use 'with_columns' first",
345
    ):
346
        df.join(df, on=pl.col("A").alias("foo"))
347

348

349
def test_sort_by_different_lengths() -> None:
350
    df = pl.DataFrame(
351
        {
352
            "group": ["a"] * 3 + ["b"] * 3,
353
            "col1": [1, 2, 3, 300, 200, 100],
354
            "col2": [1, 2, 3, 300, 1, 1],
355
        }
356
    )
357
    with pytest.raises(
358
        ShapeError,
359
        match=r"expressions in 'sort_by' must have matching group lengths",
360
    ):
361
        df.group_by("group").agg(
362
            [
363
                pl.col("col1").sort_by(pl.col("col2").unique()),
364
            ]
365
        )
366

367
    with pytest.raises(
368
        ShapeError,
369
        match=r"expressions in 'sort_by' must have matching group lengths",
370
    ):
371
        df.group_by("group").agg(
372
            [
373
                pl.col("col1").sort_by(pl.col("col2").arg_unique()),
374
            ]
375
        )
376

377
    with pytest.raises(
378
        ShapeError,
379
        match=r"expressions in 'sort_by' must have matching group lengths",
380
    ):
381
        df.group_by("group").agg(
382
            [
383
                pl.col("col1").sort_by(pl.col("col2").first()),
384
            ]
385
        )
386

387

388
def test_err_filter_no_expansion() -> None:
389
    # df contains floats
390
    df = pl.DataFrame(
391
        {
392
            "a": [0.1, 0.2],
393
        }
394
    )
395

396
    with pytest.raises(
397
        ComputeError, match=r"The predicate expanded to zero expressions"
398
    ):
399
        # we filter by ints
400
        df.filter(pl.col(pl.Int16).min() < 0.1)
401

402

403
@pytest.mark.parametrize(
404
    ("e"),
405
    [
406
        pl.col("date") > "2021-11-10",
407
        pl.col("date") < "2021-11-10",
408
    ],
409
)
410
def test_date_string_comparison(e: pl.Expr) -> None:
411
    df = pl.DataFrame(
412
        {
413
            "date": [
414
                "2022-11-01",
415
                "2022-11-02",
416
                "2022-11-05",
417
            ],
418
        }
419
    ).with_columns(pl.col("date").str.strptime(pl.Date, "%Y-%m-%d"))
420

421
    with pytest.raises(
422
        InvalidOperationError,
423
        match=r"cannot compare 'date/datetime/time' to a string value",
424
    ):
425
        df.select(e)
426

427

428
def test_compare_different_len() -> None:
429
    df = pl.DataFrame(
430
        {
431
            "idx": list(range(5)),
432
        }
433
    )
434

435
    s = pl.Series([2, 5, 8])
436
    with pytest.raises(ShapeError):
437
        df.filter(pl.col("idx") == s)
438

439

440
def test_take_negative_index_is_oob() -> None:
441
    df = pl.DataFrame({"value": [1, 2, 3]})
442
    with pytest.raises(OutOfBoundsError):
443
        df["value"].gather(-4)
444

445

446
def test_string_numeric_arithmetic_err() -> None:
447
    df = pl.DataFrame({"s": ["x"]})
448
    with pytest.raises(
449
        InvalidOperationError, match=r"arithmetic on string and numeric not allowed"
450
    ):
451
        df.select(pl.col("s") + 1)
452

453

454
def test_ambiguous_filter_err() -> None:
455
    df = pl.DataFrame({"a": [None, "2", "3"], "b": [None, None, "z"]})
456
    with pytest.raises(
457
        ComputeError,
458
        match=r"The predicate passed to 'LazyFrame.filter' expanded to multiple expressions",
459
    ):
460
        df.filter(pl.col(["a", "b"]).is_null())
461

462

463
def test_with_column_duplicates() -> None:
464
    df = pl.DataFrame({"a": [0, None, 2, 3, None], "b": [None, 1, 2, 3, None]})
465
    with pytest.raises(
466
        ComputeError,
467
        match=r"the name 'same' passed to `LazyFrame.with_columns` is duplicate.*",
468
    ):
469
        assert df.with_columns([pl.all().alias("same")]).columns == ["a", "b", "same"]
470

471

472
@pytest.mark.may_fail_cloud  # reason: eager - return_dtype must be set
473
def test_skip_nulls_err() -> None:
474
    df = pl.DataFrame({"foo": [None, None]})
475
    with pytest.raises(
476
        pl.exceptions.InvalidOperationError,
477
        match=r"UDF called without return type, but was not able to infer the output type",
478
    ):
479
        df.with_columns(pl.col("foo").map_elements(lambda x: x, skip_nulls=True))
480

481

482
@pytest.mark.parametrize(
483
    ("test_df", "type", "expected_message"),
484
    [
485
        pytest.param(
486
            pl.DataFrame({"A": [1, 2, 3], "B": ["1", "2", "help"]}),
487
            pl.UInt32,
488
            "conversion .* failed",
489
            id="Unsigned integer",
490
        )
491
    ],
492
)
493
def test_cast_err_column_value_highlighting(
494
    test_df: pl.DataFrame, type: pl.DataType, expected_message: str
495
) -> None:
496
    with pytest.raises(InvalidOperationError, match=expected_message):
497
        test_df.with_columns(pl.all().cast(type))
498

499

500
def test_invalid_group_by_arg() -> None:
501
    df = pl.DataFrame({"a": [1]})
502
    with pytest.raises(
503
        TypeError, match="specifying aggregations as a dictionary is not supported"
504
    ):
505
        df.group_by(1).agg({"a": "sum"})
506

507

508
def test_overflow_msg() -> None:
509
    with pytest.raises(
510
        ComputeError,
511
        match=r"could not append value: 2147483648 of type: i64 to the builder",
512
    ):
513
        pl.DataFrame([[2**31]], [("a", pl.Int32)], orient="row")
514

515

516
def test_sort_by_err_9259() -> None:
517
    df = pl.DataFrame(
518
        {"a": [1, 1, 1], "b": [3, 2, 1], "c": [1, 1, 2]},
519
        schema={"a": pl.Float32, "b": pl.Float32, "c": pl.Float32},
520
    )
521
    with pytest.raises(ShapeError):
522
        df.lazy().group_by("c").agg(
523
            [pl.col("a").sort_by(pl.col("b").filter(pl.col("b") > 100)).sum()]
524
        ).collect()
525

526

527
def test_empty_inputs_error() -> None:
528
    df = pl.DataFrame({"col1": [1]})
529
    with pytest.raises(
530
        pl.exceptions.InvalidOperationError, match="expected at least 1 input"
531
    ):
532
        df.select(pl.sum_horizontal(pl.exclude("col1")))
533

534

535
@pytest.mark.parametrize(
536
    ("colname", "values", "expected"),
537
    [
538
        ("a", [2], [False, True, False]),
539
        ("a", [True, False], None),
540
        ("a", ["2", "3", "4"], None),
541
        ("b", [Decimal("3.14")], None),
542
        ("c", [-2, -1, 0, 1, 2], None),
543
        (
544
            "d",
545
            pl.datetime_range(
546
                datetime.now(),
547
                datetime.now(),
548
                interval="2345ns",
549
                time_unit="ns",
550
                eager=True,
551
            ),
552
            None,
553
        ),
554
        ("d", [time(10, 30)], None),
555
        ("e", [datetime(1999, 12, 31, 10, 30)], None),
556
        ("f", ["xx", "zz"], None),
557
    ],
558
)
559
def test_invalid_is_in_dtypes(
560
    colname: str, values: list[Any], expected: list[Any] | None
561
) -> None:
562
    df = pl.DataFrame(
563
        {
564
            "a": [1, 2, 3],
565
            "b": [-2.5, 0.0, 2.5],
566
            "c": [True, None, False],
567
            "d": [datetime(2001, 10, 30), None, datetime(2009, 7, 5)],
568
            "e": [date(2029, 12, 31), date(1999, 12, 31), None],
569
            "f": [b"xx", b"yy", b"zz"],
570
        }
571
    )
572
    if expected is None:
573
        with pytest.raises(
574
            InvalidOperationError,
575
            match=r"'is_in' cannot check for .*? values in .*? data",
576
        ):
577
            df.select(pl.col(colname).is_in(values))
578
    else:
579
        assert df.select(pl.col(colname).is_in(values))[colname].to_list() == expected
580

581

582
def test_sort_by_error() -> None:
583
    df = pl.DataFrame(
584
        {
585
            "id": [1, 1, 1, 2, 2, 3, 3, 3],
586
            "number": [1, 3, 2, 1, 2, 2, 1, 3],
587
            "type": ["A", "B", "A", "B", "B", "A", "B", "C"],
588
            "cost": [10, 25, 20, 25, 30, 30, 50, 100],
589
        }
590
    )
591

592
    with pytest.raises(
593
        ShapeError,
594
        match="expressions in 'sort_by' must have matching group lengths",
595
    ):
596
        df.group_by("id", maintain_order=True).agg(
597
            pl.col("cost").filter(pl.col("type") == "A").sort_by("number")
598
        )
599

600

601
def test_non_existent_expr_inputs_in_lazy() -> None:
602
    with pytest.raises(ColumnNotFoundError):
603
        pl.LazyFrame().filter(pl.col("x") == 1).explain()  # tests: 12074
604

605
    lf = pl.LazyFrame({"foo": [1, 1, -2, 3]})
606

607
    with pytest.raises(ColumnNotFoundError):
608
        (
609
            lf.select(pl.col("foo").cum_sum().alias("bar"))
610
            .filter(pl.col("bar") == pl.col("foo"))
611
            .explain()
612
        )
613

614

615
def test_error_list_to_array() -> None:
616
    with pytest.raises(ComputeError, match="not all elements have the specified width"):
617
        pl.DataFrame(
618
            data={"a": [[1, 2], [3, 4, 5]]}, schema={"a": pl.List(pl.Int8)}
619
        ).with_columns(array=pl.col("a").list.to_array(2))
620

621

622
def test_raise_not_found_in_simplify_14974() -> None:
623
    df = pl.DataFrame()
624
    with pytest.raises(ColumnNotFoundError):
625
        df.select(1 / (1 + pl.col("a")))
626

627

628
def test_invalid_product_type() -> None:
629
    with pytest.raises(
630
        InvalidOperationError,
631
        match="`product` operation not supported for dtype",
632
    ):
633
        pl.Series([[1, 2, 3]]).product()
634

635

636
def test_fill_null_invalid_supertype() -> None:
637
    df = pl.DataFrame({"date": [date(2022, 1, 1), None]})
638
    with pytest.raises(InvalidOperationError, match="got invalid or ambiguous"):
639
        df.select(pl.col("date").fill_null(1.0))
640

641

642
@pytest.mark.may_fail_cloud  # reason: Object type not supported
643
def test_raise_invalid_arithmetic() -> None:
644
    df = pl.Series("a", [object()]).to_frame()
645

646
    with pytest.raises(InvalidOperationError):
647
        df.select(pl.col("a") - pl.col("a"))
648

649

650
def test_err_invalid_comparison() -> None:
651
    with pytest.raises(
652
        SchemaError,
653
        match="could not evaluate comparison between series 'a' of dtype: Date and series 'b' of dtype: Boolean",
654
    ):
655
        _ = pl.Series("a", [date(2020, 1, 1)]) == pl.Series("b", [True])
656

657
    with pytest.raises(
658
        InvalidOperationError,
659
        match="could not apply comparison on series of dtype 'object; operand names: 'a', 'b'",
660
    ):
661
        _ = pl.Series("a", [object()]) == pl.Series("b", [object])
662

663

664
def test_no_panic_pandas_nat() -> None:
665
    # we don't want to support pd.nat, but don't want to panic.
666
    with pytest.raises(Exception):  # noqa: B017
667
        pl.DataFrame({"x": [pd.NaT]})
668

669

670
def test_list_to_struct_invalid_type() -> None:
671
    with pytest.raises(pl.exceptions.InvalidOperationError):
672
        pl.DataFrame({"a": 1}).to_series().list.to_struct(fields=["a", "b"])
673

674

675
def test_raise_invalid_agg() -> None:
676
    with pytest.raises(pl.exceptions.ColumnNotFoundError):
677
        (
678
            pl.LazyFrame({"foo": [1]})
679
            .with_row_index()
680
            .group_by("index")
681
            .agg(pl.col("foo").filter(pl.col("i_do_not_exist")))
682
        ).collect()
683

684

685
def test_err_mean_horizontal_lists() -> None:
686
    df = pl.DataFrame(
687
        {
688
            "experiment_id": [1, 2],
689
            "sensor1": [[1, 2, 3], [7, 8, 9]],
690
            "sensor2": [[4, 5, 6], [10, 11, 12]],
691
        }
692
    )
693
    with pytest.raises(pl.exceptions.InvalidOperationError):
694
        df.with_columns(pl.mean_horizontal("sensor1", "sensor2").alias("avg_sensor"))
695

696

697
def test_raise_column_not_found_in_join_arg() -> None:
698
    a = pl.DataFrame({"x": [1, 2, 3]})
699
    b = pl.DataFrame({"y": [1, 2, 3]})
700
    with pytest.raises(pl.exceptions.ColumnNotFoundError):
701
        a.join(b, on="y")
702

703

704
def test_raise_on_different_results_20104() -> None:
705
    df = pl.DataFrame({"x": [1, 2]})
706

707
    with pytest.raises(TypeError):
708
        df.rolling("x", period="3i").agg(
709
            result=pl.col("x")
710
            .gather_every(2, offset=1)
711
            .map_batches(pl.Series.min, return_dtype=pl.Float64)
712
        )
713

714

715
@pytest.mark.parametrize("fill_value", [None, -1])
716
def test_shift_with_null_deprecated_24105(fill_value: Any) -> None:
717
    df = pl.DataFrame({"x": [1, 2, 3]})
718
    df_shift = None
719
    with pytest.deprecated_call(  # @2.0
720
        match=r"shift value 'n' is null, which currently returns a column of null values. This will become an error in the future.",
721
    ):
722
        df_shift = df.select(
723
            pl.col.x.shift(pl.col.x.filter(pl.col.x > 3).first(), fill_value=fill_value)
724
        )
725
    # Check that the result is a column of nulls, even if the fill_value is different
726
    assert_frame_equal(
727
        df_shift,
728
        pl.DataFrame({"x": [None, None, None]}),
729
        check_dtypes=False,
730
    )
731

732

733
def test_raies_on_mismatch_column_length_24500() -> None:
734
    df = pl.DataFrame(
735
        {
736
            "a": [10, 10, 10, 20, 20, 20],
737
            "b": [2, 2, 99, 3, 3, 3],
738
            "c": [3, 3, 3, 2, 2, 99],
739
        }
740
    )
741
    with pytest.raises(
742
        ShapeError,
743
        match="expressions must have matching group lengths",
744
    ):
745
        df.group_by("a").agg(
746
            pl.struct(
747
                pl.col("b").head(pl.col("b").first()),
748
                pl.col("c").head(pl.col("c").first()),
749
            )
750
        )
751

752

753
def test_raies_on_mismatch_column_length_binary_expr() -> None:
754
    df = pl.DataFrame(
755
        {
756
            "a": [10, 10, 10, 20, 20, 20],
757
            "b": [2, 0, 99, 0, 0, 0],
758
            "c": [3, 0, 0, 2, 0, 99],
759
        }
760
    )
761

762
    with pytest.raises(
763
        ShapeError,
764
        match="expressions must have matching group lengths",
765
    ):
766
        df.group_by("a").agg(
767
            pl.Expr.add(
768
                pl.col("b").head(pl.col("b").first()),
769
                pl.col("c").head(pl.col("c").first()),
770
            )
771
        )
772

773
Product

Resources

Company