Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/test_errors.py
6939 views
1
from __future__ import annotations
2
3
import io
4
from datetime import date, datetime, time, tzinfo
5
from decimal import Decimal
6
from typing import TYPE_CHECKING, Any
7
8
import numpy as np
9
import pandas as pd
10
import pytest
11
12
import polars as pl
13
from polars.datatypes.convert import dtype_to_py_type
14
from polars.exceptions import (
15
ColumnNotFoundError,
16
ComputeError,
17
InvalidOperationError,
18
OutOfBoundsError,
19
SchemaError,
20
SchemaFieldNotFoundError,
21
ShapeError,
22
StructFieldNotFoundError,
23
)
24
from polars.testing import assert_frame_equal
25
from tests.unit.conftest import TEMPORAL_DTYPES
26
27
if TYPE_CHECKING:
28
from polars._typing import ConcatMethod
29
30
31
def test_error_on_empty_group_by() -> None:
32
with pytest.raises(
33
ComputeError, match="at least one key is required in a group_by operation"
34
):
35
pl.DataFrame({"x": [0, 0, 1, 1]}).group_by([]).agg(pl.len())
36
37
38
def test_error_on_reducing_map() -> None:
39
df = pl.DataFrame(
40
{"id": [0, 0, 0, 1, 1, 1], "t": [2, 4, 5, 10, 11, 14], "y": [0, 1, 1, 2, 3, 4]}
41
)
42
with pytest.raises(
43
TypeError,
44
match=r"`map` with `returns_scalar=False`",
45
):
46
df.group_by("id").agg(
47
pl.map_batches(["t", "y"], np.mean, return_dtype=pl.Float64)
48
)
49
50
df = pl.DataFrame({"x": [1, 2, 3, 4], "group": [1, 2, 1, 2]})
51
with pytest.raises(TypeError, match=r"`map` with `returns_scalar=False`"):
52
df.select(
53
pl.col("x")
54
.map_batches(
55
lambda x: x.cut(breaks=[1, 2, 3], include_breaks=True).struct.unnest(),
56
is_elementwise=True,
57
return_dtype=pl.Struct(
58
{"breakpoint": pl.Int64, "cat": pl.Categorical()}
59
),
60
)
61
.over("group")
62
)
63
64
assert_frame_equal(
65
df.select(
66
pl.col("x")
67
.map_batches(
68
lambda x: x.cut(breaks=[1, 2, 3], include_breaks=True),
69
is_elementwise=True,
70
)
71
.struct.unnest()
72
.over("group")
73
),
74
pl.DataFrame(
75
{
76
"breakpoint": [1.0, 2.0, 3.0, float("inf")],
77
"category": ["(-inf, 1]", "(1, 2]", "(2, 3]", "(3, inf]"],
78
},
79
schema_overrides={"category": pl.Categorical()},
80
),
81
)
82
83
84
def test_error_on_invalid_by_in_asof_join() -> None:
85
df1 = pl.DataFrame(
86
{
87
"a": ["a", "b", "a"],
88
"b": [1, 2, 3],
89
"c": ["a", "b", "a"],
90
}
91
).set_sorted("b")
92
93
df2 = df1.with_columns(pl.col("a").cast(pl.Categorical))
94
with pytest.raises(ComputeError):
95
df1.join_asof(df2, on="b", by=["a", "c"])
96
97
98
@pytest.mark.parametrize("dtype", TEMPORAL_DTYPES)
99
def test_error_on_invalid_series_init(dtype: pl.DataType) -> None:
100
py_type = dtype_to_py_type(dtype)
101
with pytest.raises(
102
TypeError,
103
match=f"'float' object cannot be interpreted as a {py_type.__name__!r}",
104
):
105
pl.Series([1.5, 2.0, 3.75], dtype=dtype)
106
107
108
def test_error_on_invalid_series_init2() -> None:
109
with pytest.raises(TypeError, match="unexpected value"):
110
pl.Series([1.5, 2.0, 3.75], dtype=pl.Int32)
111
112
113
def test_error_on_invalid_struct_field() -> None:
114
with pytest.raises(StructFieldNotFoundError):
115
pl.struct(
116
[pl.Series("a", [1, 2]), pl.Series("b", ["a", "b"])], eager=True
117
).struct.field("z")
118
119
120
def test_not_found_error() -> None:
121
csv = "a,b,c\n2,1,1"
122
df = pl.read_csv(io.StringIO(csv))
123
with pytest.raises(ColumnNotFoundError):
124
df.select("d")
125
126
127
def test_string_numeric_comp_err() -> None:
128
with pytest.raises(ComputeError, match="cannot compare string with numeric type"):
129
pl.DataFrame({"a": [1.1, 21, 31, 21, 51, 61, 71, 81]}).select(pl.col("a") < "9")
130
131
132
def test_panic_error() -> None:
133
with pytest.raises(
134
InvalidOperationError,
135
match="unit: 'k' not supported",
136
):
137
pl.datetime_range(
138
start=datetime(2021, 12, 16),
139
end=datetime(2021, 12, 16, 3),
140
interval="99k",
141
eager=True,
142
)
143
144
145
def test_join_lazy_on_df() -> None:
146
df_left = pl.DataFrame(
147
{
148
"Id": [1, 2, 3, 4],
149
"Names": ["A", "B", "C", "D"],
150
}
151
)
152
df_right = pl.DataFrame({"Id": [1, 3], "Tags": ["xxx", "yyy"]})
153
154
with pytest.raises(
155
TypeError,
156
match="expected `other` .*to be a 'LazyFrame'.* not 'DataFrame'",
157
):
158
df_left.lazy().join(df_right, on="Id") # type: ignore[arg-type]
159
160
with pytest.raises(
161
TypeError,
162
match="expected `other` .*to be a 'LazyFrame'.* not 'DataFrame'",
163
):
164
df_left.lazy().join_asof(df_right, on="Id") # type: ignore[arg-type]
165
166
with pytest.raises(
167
TypeError,
168
match="expected `other` .*to be a 'LazyFrame'.* not 'pandas.core.frame.DataFrame'",
169
):
170
df_left.lazy().join_asof(df_right.to_pandas(), on="Id") # type: ignore[arg-type]
171
172
173
def test_projection_update_schema_missing_column() -> None:
174
with pytest.raises(
175
ColumnNotFoundError,
176
match='unable to find column "colC"',
177
):
178
(
179
pl.DataFrame({"colA": ["a", "b", "c"], "colB": [1, 2, 3]})
180
.lazy()
181
.filter(~pl.col("colC").is_null())
182
.group_by(["colA"])
183
.agg([pl.col("colB").sum().alias("result")])
184
.collect()
185
)
186
187
188
def test_not_found_on_rename() -> None:
189
df = pl.DataFrame({"exists": [1, 2, 3]})
190
191
err_type = (SchemaFieldNotFoundError, ColumnNotFoundError)
192
with pytest.raises(err_type):
193
df.rename({"does_not_exist": "exists"})
194
195
with pytest.raises(err_type):
196
df.select(pl.col("does_not_exist").alias("new_name"))
197
198
199
def test_getitem_errs() -> None:
200
df = pl.DataFrame({"a": [1, 2, 3]})
201
202
with pytest.raises(
203
TypeError,
204
match=r"cannot select columns using key of type 'set': {'some'}",
205
):
206
df[{"some"}] # type: ignore[call-overload]
207
208
with pytest.raises(
209
TypeError,
210
match=r"cannot select elements using key of type 'set': {'strange'}",
211
):
212
df["a"][{"strange"}] # type: ignore[call-overload]
213
214
with pytest.raises(
215
TypeError,
216
match=r"cannot use `__setitem__` on DataFrame with key {'some'} of type 'set' and value 'foo' of type 'str'",
217
):
218
df[{"some"}] = "foo" # type: ignore[index]
219
220
221
def test_err_bubbling_up_to_lit() -> None:
222
df = pl.DataFrame({"date": [date(2020, 1, 1)], "value": [42]})
223
224
with pytest.raises(TypeError):
225
df.filter(pl.col("date") == pl.Date("2020-01-01")) # type: ignore[call-arg,operator]
226
227
228
def test_error_on_double_agg() -> None:
229
for e in [
230
"mean",
231
"max",
232
"min",
233
"sum",
234
"std",
235
"var",
236
"n_unique",
237
"last",
238
"first",
239
"median",
240
"skew", # this one is comes from Apply
241
]:
242
with pytest.raises(ComputeError, match="the column is already aggregated"):
243
(
244
pl.DataFrame(
245
{
246
"a": [1, 1, 1, 2, 2],
247
"b": [1, 2, 3, 4, 5],
248
}
249
)
250
.group_by("a")
251
.agg([getattr(pl.col("b").min(), e)()])
252
)
253
254
255
def test_filter_not_of_type_bool() -> None:
256
df = pl.DataFrame({"json_val": ['{"a":"hello"}', None, '{"a":"world"}']})
257
with pytest.raises(
258
InvalidOperationError, match="filter predicate must be of type `Boolean`, got"
259
):
260
df.filter(pl.col("json_val").str.json_path_match("$.a"))
261
262
263
def test_is_nan_on_non_boolean() -> None:
264
with pytest.raises(InvalidOperationError):
265
pl.Series(["1", "2", "3"]).fill_nan("2") # type: ignore[arg-type]
266
267
268
@pytest.mark.may_fail_cloud # reason: eager - return_dtype must be set
269
def test_window_expression_different_group_length() -> None:
270
try:
271
pl.DataFrame({"groups": ["a", "a", "b", "a", "b"]}).select(
272
pl.col("groups").map_elements(lambda _: pl.Series([1, 2])).over("groups")
273
)
274
except ShapeError as exc:
275
msg = str(exc)
276
assert (
277
"the length of the window expression did not match that of the group" in msg
278
)
279
assert "group:" in msg
280
assert "group length:" in msg
281
assert "output: 'shape:" in msg
282
283
284
def test_invalid_concat_type_err() -> None:
285
df = pl.DataFrame(
286
{
287
"foo": [1, 2],
288
"bar": [6, 7],
289
"ham": ["a", "b"],
290
}
291
)
292
with pytest.raises(
293
ValueError,
294
match="DataFrame `how` must be one of {'vertical', '.+', 'align_right'}, got 'sausage'",
295
):
296
pl.concat([df, df], how="sausage") # type: ignore[arg-type]
297
298
299
@pytest.mark.parametrize("how", ["horizontal", "diagonal"])
300
def test_series_concat_err(how: ConcatMethod) -> None:
301
s = pl.Series([1, 2, 3])
302
with pytest.raises(
303
ValueError,
304
match="Series only supports 'vertical' concat strategy",
305
):
306
pl.concat([s, s], how=how)
307
308
309
def test_invalid_sort_by() -> None:
310
df = pl.DataFrame(
311
{
312
"a": ["bill", "bob", "jen", "allie", "george"],
313
"b": ["M", "M", "F", "F", "M"],
314
"c": [32, 40, 20, 19, 39],
315
}
316
)
317
318
# `select a where b order by c desc`
319
with pytest.raises(ShapeError):
320
df.select(pl.col("a").filter(pl.col("b") == "M").sort_by("c", descending=True))
321
322
323
def test_epoch_time_type() -> None:
324
with pytest.raises(
325
InvalidOperationError,
326
match="`timestamp` operation not supported for dtype `time`",
327
):
328
pl.Series([time(0, 0, 1)]).dt.epoch("s")
329
330
331
def test_duplicate_columns_arg_csv() -> None:
332
f = io.BytesIO()
333
pl.DataFrame({"x": [1, 2, 3], "y": ["a", "b", "c"]}).write_csv(f)
334
f.seek(0)
335
with pytest.raises(
336
ValueError, match=r"`columns` arg should only have unique values"
337
):
338
pl.read_csv(f, columns=["x", "x", "y"])
339
340
341
def test_datetime_time_add_err() -> None:
342
with pytest.raises(SchemaError, match="failed to determine supertype"):
343
pl.Series([datetime(1970, 1, 1, 0, 0, 1)]) + pl.Series([time(0, 0, 2)])
344
345
346
def test_invalid_dtype() -> None:
347
with pytest.raises(
348
TypeError,
349
match=r"cannot parse input of type 'str' into Polars data type \(given: 'mayonnaise'\)",
350
):
351
pl.Series([1, 2], dtype="mayonnaise") # type: ignore[arg-type]
352
353
with pytest.raises(
354
TypeError,
355
match="cannot parse input <class 'datetime.tzinfo'> into Polars data type",
356
):
357
pl.Series([None], dtype=tzinfo) # type: ignore[arg-type]
358
359
360
def test_arr_eval_named_cols() -> None:
361
df = pl.DataFrame({"A": ["a", "b"], "B": [["a", "b"], ["c", "d"]]})
362
363
with pytest.raises(ComputeError):
364
df.select(pl.col("B").list.eval(pl.element().append(pl.col("A"))))
365
366
367
def test_alias_in_join_keys() -> None:
368
df = pl.DataFrame({"A": ["a", "b"], "B": [["a", "b"], ["c", "d"]]})
369
with pytest.raises(
370
InvalidOperationError,
371
match=r"'alias' is not allowed in a join key, use 'with_columns' first",
372
):
373
df.join(df, on=pl.col("A").alias("foo"))
374
375
376
def test_sort_by_different_lengths() -> None:
377
df = pl.DataFrame(
378
{
379
"group": ["a"] * 3 + ["b"] * 3,
380
"col1": [1, 2, 3, 300, 200, 100],
381
"col2": [1, 2, 3, 300, 1, 1],
382
}
383
)
384
with pytest.raises(
385
ComputeError,
386
match=r"expressions in 'sort_by' must have matching group lengths",
387
):
388
df.group_by("group").agg(
389
[
390
pl.col("col1").sort_by(pl.col("col2").unique()),
391
]
392
)
393
394
with pytest.raises(
395
ComputeError,
396
match=r"expressions in 'sort_by' must have matching group lengths",
397
):
398
df.group_by("group").agg(
399
[
400
pl.col("col1").sort_by(pl.col("col2").arg_unique()),
401
]
402
)
403
404
405
def test_err_filter_no_expansion() -> None:
406
# df contains floats
407
df = pl.DataFrame(
408
{
409
"a": [0.1, 0.2],
410
}
411
)
412
413
with pytest.raises(
414
ComputeError, match=r"The predicate expanded to zero expressions"
415
):
416
# we filter by ints
417
df.filter(pl.col(pl.Int16).min() < 0.1)
418
419
420
@pytest.mark.parametrize(
421
("e"),
422
[
423
pl.col("date") > "2021-11-10",
424
pl.col("date") < "2021-11-10",
425
],
426
)
427
def test_date_string_comparison(e: pl.Expr) -> None:
428
df = pl.DataFrame(
429
{
430
"date": [
431
"2022-11-01",
432
"2022-11-02",
433
"2022-11-05",
434
],
435
}
436
).with_columns(pl.col("date").str.strptime(pl.Date, "%Y-%m-%d"))
437
438
with pytest.raises(
439
InvalidOperationError,
440
match=r"cannot compare 'date/datetime/time' to a string value",
441
):
442
df.select(e)
443
444
445
def test_compare_different_len() -> None:
446
df = pl.DataFrame(
447
{
448
"idx": list(range(5)),
449
}
450
)
451
452
s = pl.Series([2, 5, 8])
453
with pytest.raises(ShapeError):
454
df.filter(pl.col("idx") == s)
455
456
457
def test_take_negative_index_is_oob() -> None:
458
df = pl.DataFrame({"value": [1, 2, 3]})
459
with pytest.raises(OutOfBoundsError):
460
df["value"].gather(-4)
461
462
463
def test_string_numeric_arithmetic_err() -> None:
464
df = pl.DataFrame({"s": ["x"]})
465
with pytest.raises(
466
InvalidOperationError, match=r"arithmetic on string and numeric not allowed"
467
):
468
df.select(pl.col("s") + 1)
469
470
471
def test_ambiguous_filter_err() -> None:
472
df = pl.DataFrame({"a": [None, "2", "3"], "b": [None, None, "z"]})
473
with pytest.raises(
474
ComputeError,
475
match=r"The predicate passed to 'LazyFrame.filter' expanded to multiple expressions",
476
):
477
df.filter(pl.col(["a", "b"]).is_null())
478
479
480
def test_with_column_duplicates() -> None:
481
df = pl.DataFrame({"a": [0, None, 2, 3, None], "b": [None, 1, 2, 3, None]})
482
with pytest.raises(
483
ComputeError,
484
match=r"the name 'same' passed to `LazyFrame.with_columns` is duplicate.*",
485
):
486
assert df.with_columns([pl.all().alias("same")]).columns == ["a", "b", "same"]
487
488
489
@pytest.mark.may_fail_cloud # reason: eager - return_dtype must be set
490
def test_skip_nulls_err() -> None:
491
df = pl.DataFrame({"foo": [None, None]})
492
with pytest.raises(
493
pl.exceptions.InvalidOperationError,
494
match="UDF called without return type, but was not able to infer the output type.",
495
):
496
df.with_columns(pl.col("foo").map_elements(lambda x: x, skip_nulls=True))
497
498
499
@pytest.mark.parametrize(
500
("test_df", "type", "expected_message"),
501
[
502
pytest.param(
503
pl.DataFrame({"A": [1, 2, 3], "B": ["1", "2", "help"]}),
504
pl.UInt32,
505
"conversion .* failed",
506
id="Unsigned integer",
507
)
508
],
509
)
510
def test_cast_err_column_value_highlighting(
511
test_df: pl.DataFrame, type: pl.DataType, expected_message: str
512
) -> None:
513
with pytest.raises(InvalidOperationError, match=expected_message):
514
test_df.with_columns(pl.all().cast(type))
515
516
517
def test_lit_agg_err() -> None:
518
with pytest.raises(ComputeError, match=r"cannot aggregate a literal"):
519
pl.DataFrame({"y": [1]}).with_columns(pl.lit(1).sum().over("y"))
520
521
522
def test_invalid_group_by_arg() -> None:
523
df = pl.DataFrame({"a": [1]})
524
with pytest.raises(
525
TypeError, match="specifying aggregations as a dictionary is not supported"
526
):
527
df.group_by(1).agg({"a": "sum"})
528
529
530
def test_overflow_msg() -> None:
531
with pytest.raises(
532
ComputeError,
533
match=r"could not append value: 2147483648 of type: i64 to the builder",
534
):
535
pl.DataFrame([[2**31]], [("a", pl.Int32)], orient="row")
536
537
538
def test_sort_by_err_9259() -> None:
539
df = pl.DataFrame(
540
{"a": [1, 1, 1], "b": [3, 2, 1], "c": [1, 1, 2]},
541
schema={"a": pl.Float32, "b": pl.Float32, "c": pl.Float32},
542
)
543
with pytest.raises(ComputeError):
544
df.lazy().group_by("c").agg(
545
[pl.col("a").sort_by(pl.col("b").filter(pl.col("b") > 100)).sum()]
546
).collect()
547
548
549
def test_empty_inputs_error() -> None:
550
df = pl.DataFrame({"col1": [1]})
551
with pytest.raises(
552
pl.exceptions.InvalidOperationError, match="expected at least 1 input"
553
):
554
df.select(pl.sum_horizontal(pl.exclude("col1")))
555
556
557
@pytest.mark.parametrize(
558
("colname", "values", "expected"),
559
[
560
("a", [2], [False, True, False]),
561
("a", [True, False], None),
562
("a", ["2", "3", "4"], None),
563
("b", [Decimal("3.14")], None),
564
("c", [-2, -1, 0, 1, 2], None),
565
(
566
"d",
567
pl.datetime_range(
568
datetime.now(),
569
datetime.now(),
570
interval="2345ns",
571
time_unit="ns",
572
eager=True,
573
),
574
None,
575
),
576
("d", [time(10, 30)], None),
577
("e", [datetime(1999, 12, 31, 10, 30)], None),
578
("f", ["xx", "zz"], None),
579
],
580
)
581
def test_invalid_is_in_dtypes(
582
colname: str, values: list[Any], expected: list[Any] | None
583
) -> None:
584
df = pl.DataFrame(
585
{
586
"a": [1, 2, 3],
587
"b": [-2.5, 0.0, 2.5],
588
"c": [True, None, False],
589
"d": [datetime(2001, 10, 30), None, datetime(2009, 7, 5)],
590
"e": [date(2029, 12, 31), date(1999, 12, 31), None],
591
"f": [b"xx", b"yy", b"zz"],
592
}
593
)
594
if expected is None:
595
with pytest.raises(
596
InvalidOperationError,
597
match="'is_in' cannot check for .*? values in .*? data",
598
):
599
df.select(pl.col(colname).is_in(values))
600
else:
601
assert df.select(pl.col(colname).is_in(values))[colname].to_list() == expected
602
603
604
def test_sort_by_error() -> None:
605
df = pl.DataFrame(
606
{
607
"id": [1, 1, 1, 2, 2, 3, 3, 3],
608
"number": [1, 3, 2, 1, 2, 2, 1, 3],
609
"type": ["A", "B", "A", "B", "B", "A", "B", "C"],
610
"cost": [10, 25, 20, 25, 30, 30, 50, 100],
611
}
612
)
613
614
with pytest.raises(
615
ComputeError,
616
match="expressions in 'sort_by' must have matching group lengths",
617
):
618
df.group_by("id", maintain_order=True).agg(
619
pl.col("cost").filter(pl.col("type") == "A").sort_by("number")
620
)
621
622
623
def test_non_existent_expr_inputs_in_lazy() -> None:
624
with pytest.raises(ColumnNotFoundError):
625
pl.LazyFrame().filter(pl.col("x") == 1).explain() # tests: 12074
626
627
lf = pl.LazyFrame({"foo": [1, 1, -2, 3]})
628
629
with pytest.raises(ColumnNotFoundError):
630
(
631
lf.select(pl.col("foo").cum_sum().alias("bar"))
632
.filter(pl.col("bar") == pl.col("foo"))
633
.explain()
634
)
635
636
637
def test_error_list_to_array() -> None:
638
with pytest.raises(ComputeError, match="not all elements have the specified width"):
639
pl.DataFrame(
640
data={"a": [[1, 2], [3, 4, 5]]}, schema={"a": pl.List(pl.Int8)}
641
).with_columns(array=pl.col("a").list.to_array(2))
642
643
644
def test_raise_not_found_in_simplify_14974() -> None:
645
df = pl.DataFrame()
646
with pytest.raises(ColumnNotFoundError):
647
df.select(1 / (1 + pl.col("a")))
648
649
650
def test_invalid_product_type() -> None:
651
with pytest.raises(
652
InvalidOperationError,
653
match="`product` operation not supported for dtype",
654
):
655
pl.Series([[1, 2, 3]]).product()
656
657
658
def test_fill_null_invalid_supertype() -> None:
659
df = pl.DataFrame({"date": [date(2022, 1, 1), None]})
660
with pytest.raises(InvalidOperationError, match="got invalid or ambiguous"):
661
df.select(pl.col("date").fill_null(1.0))
662
663
664
def test_raise_array_of_cats() -> None:
665
with pytest.raises(InvalidOperationError, match="is not yet supported"):
666
pl.Series([["a", "b"], ["a", "c"]], dtype=pl.Array(pl.Categorical, 2))
667
668
669
@pytest.mark.may_fail_cloud # reason: Object type not supported
670
def test_raise_invalid_arithmetic() -> None:
671
df = pl.Series("a", [object()]).to_frame()
672
673
with pytest.raises(InvalidOperationError):
674
df.select(pl.col("a") - pl.col("a"))
675
676
677
def test_raise_on_sorted_multi_args() -> None:
678
with pytest.raises(TypeError):
679
pl.DataFrame({"a": [1], "b": [1]}).set_sorted(
680
["a", "b"] # type: ignore[arg-type]
681
)
682
683
684
def test_err_invalid_comparison() -> None:
685
with pytest.raises(
686
SchemaError,
687
match="could not evaluate comparison between series 'a' of dtype: date and series 'b' of dtype: bool",
688
):
689
_ = pl.Series("a", [date(2020, 1, 1)]) == pl.Series("b", [True])
690
691
with pytest.raises(
692
InvalidOperationError,
693
match="could not apply comparison on series of dtype 'object; operand names: 'a', 'b'",
694
):
695
_ = pl.Series("a", [object()]) == pl.Series("b", [object])
696
697
698
def test_no_panic_pandas_nat() -> None:
699
# we don't want to support pd.nat, but don't want to panic.
700
with pytest.raises(Exception): # noqa: B017
701
pl.DataFrame({"x": [pd.NaT]})
702
703
704
def test_list_to_struct_invalid_type() -> None:
705
with pytest.raises(pl.exceptions.InvalidOperationError):
706
pl.DataFrame({"a": 1}).to_series().list.to_struct(fields=["a", "b"])
707
708
709
def test_raise_invalid_agg() -> None:
710
with pytest.raises(pl.exceptions.ColumnNotFoundError):
711
(
712
pl.LazyFrame({"foo": [1]})
713
.with_row_index()
714
.group_by("index")
715
.agg(pl.col("foo").filter(pl.col("i_do_not_exist")))
716
).collect()
717
718
719
def test_err_mean_horizontal_lists() -> None:
720
df = pl.DataFrame(
721
{
722
"experiment_id": [1, 2],
723
"sensor1": [[1, 2, 3], [7, 8, 9]],
724
"sensor2": [[4, 5, 6], [10, 11, 12]],
725
}
726
)
727
with pytest.raises(pl.exceptions.InvalidOperationError):
728
df.with_columns(pl.mean_horizontal("sensor1", "sensor2").alias("avg_sensor"))
729
730
731
def test_raise_column_not_found_in_join_arg() -> None:
732
a = pl.DataFrame({"x": [1, 2, 3]})
733
b = pl.DataFrame({"y": [1, 2, 3]})
734
with pytest.raises(pl.exceptions.ColumnNotFoundError):
735
a.join(b, on="y")
736
737
738
def test_raise_on_different_results_20104() -> None:
739
df = pl.DataFrame({"x": [1, 2]})
740
741
with pytest.raises(TypeError):
742
df.rolling("x", period="3i").agg(
743
result=pl.col("x")
744
.gather_every(2, offset=1)
745
.map_batches(pl.Series.min, return_dtype=pl.Float64)
746
)
747
748
749
@pytest.mark.parametrize("fill_value", [None, -1])
750
def test_shift_with_null_deprecated_24105(fill_value: Any) -> None:
751
df = pl.DataFrame({"x": [1, 2, 3]})
752
df_shift = None
753
with pytest.deprecated_call( # @2.0
754
match=r"shift value 'n' is null, which currently returns a column of null values. This will become an error in the future.",
755
):
756
df_shift = df.select(
757
pl.col.x.shift(pl.col.x.filter(pl.col.x > 3).first(), fill_value=fill_value)
758
)
759
# Check that the result is a column of nulls, even if the fill_value is different
760
assert_frame_equal(
761
df_shift,
762
pl.DataFrame({"x": [None, None, None]}),
763
check_dtypes=False,
764
)
765
766