Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/operations/namespaces/array/test_array.py
8353 views
1
from __future__ import annotations
2
3
import datetime
4
from typing import Any
5
6
import pytest
7
8
import polars as pl
9
from polars.exceptions import ComputeError, InvalidOperationError
10
from polars.testing import assert_frame_equal, assert_series_equal
11
12
13
def test_arr_min_max() -> None:
14
s = pl.Series("a", [[1, 2], [4, 3]], dtype=pl.Array(pl.Int64, 2))
15
assert s.arr.max().to_list() == [2, 4]
16
assert s.arr.min().to_list() == [1, 3]
17
18
s_with_null = pl.Series("a", [[None, 2], None, [3, 4]], dtype=pl.Array(pl.Int64, 2))
19
assert s_with_null.arr.max().to_list() == [2, None, 4]
20
assert s_with_null.arr.min().to_list() == [2, None, 3]
21
22
23
def test_arr_mean_median_var_std() -> None:
24
s = pl.Series("a", [[1, 2], [4, 3]], dtype=pl.Array(pl.Int64, 2))
25
assert s.arr.mean().to_list() == [1.5, 3.5]
26
assert s.arr.median().to_list() == [1.5, 3.5]
27
assert s.arr.var().to_list() == [0.5, 0.5]
28
assert round(s.arr.std().to_list()[0], 5) == 0.70711
29
30
s_with_null = pl.Series("a", [[3, 4], None, [None, 2]], dtype=pl.Array(pl.Int64, 2))
31
assert s_with_null.arr.mean().to_list() == [3.5, None, 2.0]
32
assert s_with_null.arr.median().to_list() == [3.5, None, 2.0]
33
assert s_with_null.arr.var().to_list() == [0.5, None, None]
34
assert round(s_with_null.arr.std().to_list()[0], 5) == 0.70711
35
36
37
def test_array_min_max_dtype_12123() -> None:
38
df = pl.LazyFrame(
39
[pl.Series("a", [[1.0, 3.0], [2.0, 5.0]]), pl.Series("b", [1.0, 2.0])],
40
schema_overrides={
41
"a": pl.Array(pl.Float64, 2),
42
},
43
)
44
45
df = df.with_columns(
46
max=pl.col("a").arr.max().alias("max"),
47
min=pl.col("a").arr.min().alias("min"),
48
)
49
50
assert df.collect_schema() == {
51
"a": pl.Array(pl.Float64, 2),
52
"b": pl.Float64,
53
"max": pl.Float64,
54
"min": pl.Float64,
55
}
56
57
out = df.select(pl.col("max") * pl.col("b"), pl.col("min") * pl.col("b")).collect()
58
59
assert_frame_equal(out, pl.DataFrame({"max": [3.0, 10.0], "min": [1.0, 4.0]}))
60
61
62
@pytest.mark.parametrize(
63
("data", "expected_sum", "dtype"),
64
[
65
([[1, 2], [4, 3]], [3, 7], pl.Int64),
66
([[1, None], [None, 3], [None, None]], [1, 3, 0], pl.Int64),
67
([[1.0, 2.0], [4.0, 3.0]], [3.0, 7.0], pl.Float32),
68
([[1.0, None], [None, 3.0], [None, None]], [1.0, 3.0, 0], pl.Float32),
69
([[True, False], [True, True], [False, False]], [1, 2, 0], pl.Boolean),
70
([[True, None], [None, False], [None, None]], [1, 0, 0], pl.Boolean),
71
],
72
)
73
def test_arr_sum(
74
data: list[list[Any]], expected_sum: list[Any], dtype: pl.DataType
75
) -> None:
76
s = pl.Series("a", data, dtype=pl.Array(dtype, 2))
77
assert s.arr.sum().to_list() == expected_sum
78
79
80
@pytest.mark.may_fail_cloud
81
def test_array_lengths_zwa() -> None:
82
assert pl.Series("a", [[], []], pl.Array(pl.Null, 0)).arr.len().to_list() == [0, 0]
83
assert pl.Series("a", [None, []], pl.Array(pl.Null, 0)).arr.len().to_list() == [
84
None,
85
0,
86
]
87
assert pl.Series("a", [None], pl.Array(pl.Null, 0)).arr.len().to_list() == [None]
88
89
assert pl.Series("a", [], pl.Array(pl.Null, 0)).arr.len().to_list() == []
90
91
92
def test_array_lengths() -> None:
93
df = pl.DataFrame(
94
[
95
pl.Series("a", [[1, 2, 3]], dtype=pl.Array(pl.Int64, 3)),
96
pl.Series("b", [[4, 5]], dtype=pl.Array(pl.Int64, 2)),
97
]
98
)
99
out = df.select(pl.col("a").arr.len(), pl.col("b").arr.len())
100
expected_df = pl.DataFrame(
101
{"a": [3], "b": [2]},
102
schema={"a": pl.get_index_type(), "b": pl.get_index_type()},
103
)
104
assert_frame_equal(out, expected_df)
105
106
assert pl.Series("a", [], pl.Array(pl.Null, 1)).arr.len().to_list() == []
107
assert pl.Series(
108
"a", [[1, 2, 3], None, [7, 8, 9]], pl.Array(pl.get_index_type(), 3)
109
).arr.len().to_list() == [3, None, 3]
110
111
112
@pytest.mark.parametrize(
113
("as_array"),
114
[True, False],
115
)
116
def test_arr_slice(as_array: bool) -> None:
117
df = pl.DataFrame(
118
{
119
"arr": [[1, 2, 3], [10, 2, 1]],
120
},
121
schema={"arr": pl.Array(pl.Int64, 3)},
122
)
123
124
assert df.select([pl.col("arr").arr.slice(0, 1, as_array=as_array)]).to_dict(
125
as_series=False
126
) == {"arr": [[1], [10]]}
127
assert df.select([pl.col("arr").arr.slice(1, 1, as_array=as_array)]).to_dict(
128
as_series=False
129
) == {"arr": [[2], [2]]}
130
assert df.select([pl.col("arr").arr.slice(-1, 1, as_array=as_array)]).to_dict(
131
as_series=False
132
) == {"arr": [[3], [1]]}
133
assert df.select([pl.col("arr").arr.slice(-2, 1, as_array=as_array)]).to_dict(
134
as_series=False
135
) == {"arr": [[2], [2]]}
136
assert df.select([pl.col("arr").arr.slice(-2, 2, as_array=as_array)]).to_dict(
137
as_series=False
138
) == {"arr": [[2, 3], [2, 1]]}
139
return
140
141
142
@pytest.mark.parametrize(
143
("as_array"),
144
[True, False],
145
)
146
def test_arr_slice_on_series(as_array: bool) -> None:
147
vals = [[1, 2, 3, 4], [10, 2, 1, 2]]
148
s = pl.Series("a", vals, dtype=pl.Array(pl.Int64, 4))
149
assert s.arr.head(2, as_array=as_array).to_list() == [[1, 2], [10, 2]]
150
assert s.arr.tail(2, as_array=as_array).to_list() == [[3, 4], [1, 2]]
151
assert s.arr.tail(10, as_array=as_array).to_list() == vals
152
assert s.arr.head(10, as_array=as_array).to_list() == vals
153
assert s.arr.slice(1, 2, as_array=as_array).to_list() == [[2, 3], [2, 1]]
154
assert s.arr.slice(-5, 2, as_array=as_array).to_list() == [[1], [10]]
155
# TODO: there is a bug in list.slice that does not allow negative values for head
156
if as_array:
157
assert s.arr.tail(-1, as_array=as_array).to_list() == [[2, 3, 4], [2, 1, 2]]
158
assert s.arr.tail(-2, as_array=as_array).to_list() == [[3, 4], [1, 2]]
159
assert s.arr.tail(-3, as_array=as_array).to_list() == [[4], [2]]
160
assert s.arr.head(-1, as_array=as_array).to_list() == [[1, 2, 3], [10, 2, 1]]
161
assert s.arr.head(-2, as_array=as_array).to_list() == [[1, 2], [10, 2]]
162
assert s.arr.head(-3, as_array=as_array).to_list() == [[1], [10]]
163
164
165
def test_arr_unique() -> None:
166
df = pl.DataFrame(
167
{"a": pl.Series("a", [[1, 1], [4, 3]], dtype=pl.Array(pl.Int64, 2))}
168
)
169
170
out = df.select(pl.col("a").arr.unique(maintain_order=True))
171
expected = pl.DataFrame({"a": [[1], [4, 3]]})
172
assert_frame_equal(out, expected)
173
174
175
def test_array_any_all() -> None:
176
s = pl.Series(
177
[[True, True], [False, True], [False, False], [None, None], None],
178
dtype=pl.Array(pl.Boolean, 2),
179
)
180
181
expected_any = pl.Series([True, True, False, False, None])
182
assert_series_equal(s.arr.any(), expected_any)
183
184
expected_all = pl.Series([True, False, False, True, None])
185
assert_series_equal(s.arr.all(), expected_all)
186
187
s = pl.Series([[1, 2], [3, 4], [5, 6]], dtype=pl.Array(pl.Int64, 2))
188
with pytest.raises(ComputeError, match="expected boolean elements in array"):
189
s.arr.any()
190
with pytest.raises(ComputeError, match="expected boolean elements in array"):
191
s.arr.all()
192
193
194
def test_array_sort() -> None:
195
s = pl.Series([[2, None, 1], [1, 3, 2]], dtype=pl.Array(pl.UInt32, 3))
196
197
desc = s.arr.sort(descending=True)
198
expected = pl.Series([[None, 2, 1], [3, 2, 1]], dtype=pl.Array(pl.UInt32, 3))
199
assert_series_equal(desc, expected)
200
201
asc = s.arr.sort(descending=False)
202
expected = pl.Series([[None, 1, 2], [1, 2, 3]], dtype=pl.Array(pl.UInt32, 3))
203
assert_series_equal(asc, expected)
204
205
# test nulls_last
206
s = pl.Series([[None, 1, 2], [-1, None, 9]], dtype=pl.Array(pl.Int8, 3))
207
assert_series_equal(
208
s.arr.sort(nulls_last=True),
209
pl.Series([[1, 2, None], [-1, 9, None]], dtype=pl.Array(pl.Int8, 3)),
210
)
211
assert_series_equal(
212
s.arr.sort(nulls_last=False),
213
pl.Series([[None, 1, 2], [None, -1, 9]], dtype=pl.Array(pl.Int8, 3)),
214
)
215
216
217
def test_array_reverse() -> None:
218
s = pl.Series([[2, None, 1], [1, None, 2]], dtype=pl.Array(pl.UInt32, 3))
219
220
s = s.arr.reverse()
221
expected = pl.Series([[1, None, 2], [2, None, 1]], dtype=pl.Array(pl.UInt32, 3))
222
assert_series_equal(s, expected)
223
224
225
def test_array_arg_min_max() -> None:
226
s = pl.Series("a", [[1, 2, 4], [3, 2, 1]], dtype=pl.Array(pl.UInt32, 3))
227
expected = pl.Series("a", [0, 2], dtype=pl.get_index_type())
228
assert_series_equal(s.arr.arg_min(), expected)
229
expected = pl.Series("a", [2, 0], dtype=pl.get_index_type())
230
assert_series_equal(s.arr.arg_max(), expected)
231
232
233
def test_array_get() -> None:
234
s = pl.Series(
235
"a",
236
[[1, 2, 3, 4], [5, 6, None, None], [7, 8, 9, 10]],
237
dtype=pl.Array(pl.Int64, 4),
238
)
239
240
# Test index literal.
241
out = s.arr.get(1, null_on_oob=False)
242
expected = pl.Series("a", [2, 6, 8], dtype=pl.Int64)
243
assert_series_equal(out, expected)
244
245
# Null index literal.
246
out_df = s.to_frame().select(pl.col.a.arr.get(pl.lit(None), null_on_oob=False))
247
expected_df = pl.Series("a", [None, None, None], dtype=pl.Int64).to_frame()
248
assert_frame_equal(out_df, expected_df)
249
250
# Out-of-bounds index literal.
251
with pytest.raises(ComputeError, match="get index is out of bounds"):
252
out = s.arr.get(100, null_on_oob=False)
253
254
# Negative index literal.
255
out = s.arr.get(-2, null_on_oob=False)
256
expected = pl.Series("a", [3, None, 9], dtype=pl.Int64)
257
assert_series_equal(out, expected)
258
259
# Test index expr.
260
with pytest.raises(ComputeError, match="get index is out of bounds"):
261
out = s.arr.get(pl.Series([1, -2, 100]), null_on_oob=False)
262
263
out = s.arr.get(pl.Series([1, -2, 0]), null_on_oob=False)
264
expected = pl.Series("a", [2, None, 7], dtype=pl.Int64)
265
assert_series_equal(out, expected)
266
267
# Test logical type.
268
s = pl.Series(
269
"a",
270
[
271
[datetime.date(1999, 1, 1), datetime.date(2000, 1, 1)],
272
[datetime.date(2001, 10, 1), None],
273
[None, None],
274
],
275
dtype=pl.Array(pl.Date, 2),
276
)
277
with pytest.raises(ComputeError, match="get index is out of bounds"):
278
out = s.arr.get(pl.Series([1, -2, 4]), null_on_oob=False)
279
280
281
def test_array_get_null_on_oob() -> None:
282
s = pl.Series(
283
"a",
284
[[1, 2, 3, 4], [5, 6, None, None], [7, 8, 9, 10]],
285
dtype=pl.Array(pl.Int64, 4),
286
)
287
288
# Test index literal.
289
out = s.arr.get(1, null_on_oob=True)
290
expected = pl.Series("a", [2, 6, 8], dtype=pl.Int64)
291
assert_series_equal(out, expected)
292
293
# Null index literal.
294
out_df = s.to_frame().select(pl.col.a.arr.get(pl.lit(None), null_on_oob=True))
295
expected_df = pl.Series("a", [None, None, None], dtype=pl.Int64).to_frame()
296
assert_frame_equal(out_df, expected_df)
297
298
# Out-of-bounds index literal.
299
out = s.arr.get(100, null_on_oob=True)
300
expected = pl.Series("a", [None, None, None], dtype=pl.Int64)
301
assert_series_equal(out, expected)
302
303
# Negative index literal.
304
out = s.arr.get(-2, null_on_oob=True)
305
expected = pl.Series("a", [3, None, 9], dtype=pl.Int64)
306
assert_series_equal(out, expected)
307
308
# Test index expr.
309
out = s.arr.get(pl.Series([1, -2, 100]), null_on_oob=True)
310
expected = pl.Series("a", [2, None, None], dtype=pl.Int64)
311
assert_series_equal(out, expected)
312
313
# Test logical type.
314
s = pl.Series(
315
"a",
316
[
317
[datetime.date(1999, 1, 1), datetime.date(2000, 1, 1)],
318
[datetime.date(2001, 10, 1), None],
319
[None, None],
320
],
321
dtype=pl.Array(pl.Date, 2),
322
)
323
out = s.arr.get(pl.Series([1, -2, 4]), null_on_oob=True)
324
expected = pl.Series(
325
"a",
326
[datetime.date(2000, 1, 1), datetime.date(2001, 10, 1), None],
327
dtype=pl.Date,
328
)
329
assert_series_equal(out, expected)
330
331
332
def test_arr_first_last() -> None:
333
s = pl.Series(
334
"a",
335
[[1, 2, 3], [None, 5, 6], [None, None, None]],
336
dtype=pl.Array(pl.Int64, 3),
337
)
338
339
first = s.arr.first()
340
expected_first = pl.Series(
341
"a",
342
[1, None, None],
343
dtype=pl.Int64,
344
)
345
assert_series_equal(first, expected_first)
346
347
last = s.arr.last()
348
expected_last = pl.Series(
349
"a",
350
[3, 6, None],
351
dtype=pl.Int64,
352
)
353
assert_series_equal(last, expected_last)
354
355
356
@pytest.mark.parametrize(
357
("data", "set", "dtype"),
358
[
359
([1, 2], [[1, 2], [3, 4]], pl.Int64),
360
([True, False], [[True, False], [True, True]], pl.Boolean),
361
(["a", "b"], [["a", "b"], ["c", "d"]], pl.String),
362
([b"a", b"b"], [[b"a", b"b"], [b"c", b"d"]], pl.Binary),
363
(
364
[{"a": 1}, {"a": 2}],
365
[[{"a": 1}, {"a": 2}], [{"b": 1}, {"a": 3}]],
366
pl.Struct([pl.Field("a", pl.Int64)]),
367
),
368
],
369
)
370
def test_is_in_array(data: list[Any], set: list[list[Any]], dtype: pl.DataType) -> None:
371
df = pl.DataFrame(
372
{"a": data, "arr": set},
373
schema={"a": dtype, "arr": pl.Array(dtype, 2)},
374
)
375
out = df.select(is_in=pl.col("a").is_in(pl.col("arr"))).to_series()
376
expected = pl.Series("is_in", [True, False])
377
assert_series_equal(out, expected)
378
379
380
def test_array_join() -> None:
381
df = pl.DataFrame(
382
{
383
"a": [["ab", "c", "d"], ["e", "f", "g"], [None, None, None], None],
384
"separator": ["&", None, "*", "_"],
385
},
386
schema={
387
"a": pl.Array(pl.String, 3),
388
"separator": pl.String,
389
},
390
)
391
out = df.select(pl.col("a").arr.join("-"))
392
assert out.to_dict(as_series=False) == {"a": ["ab-c-d", "e-f-g", "", None]}
393
out = df.select(pl.col("a").arr.join(pl.col("separator")))
394
assert out.to_dict(as_series=False) == {"a": ["ab&c&d", None, "", None]}
395
396
# test ignore_nulls argument
397
df = pl.DataFrame(
398
{
399
"a": [
400
["a", None, "b", None],
401
None,
402
[None, None, None, None],
403
["c", "d", "e", "f"],
404
],
405
"separator": ["-", "&", " ", "@"],
406
},
407
schema={
408
"a": pl.Array(pl.String, 4),
409
"separator": pl.String,
410
},
411
)
412
# ignore nulls
413
out = df.select(pl.col("a").arr.join("-", ignore_nulls=True))
414
assert out.to_dict(as_series=False) == {"a": ["a-b", None, "", "c-d-e-f"]}
415
out = df.select(pl.col("a").arr.join(pl.col("separator"), ignore_nulls=True))
416
assert out.to_dict(as_series=False) == {"a": ["a-b", None, "", "c@d@e@f"]}
417
# propagate nulls
418
out = df.select(pl.col("a").arr.join("-", ignore_nulls=False))
419
assert out.to_dict(as_series=False) == {"a": [None, None, None, "c-d-e-f"]}
420
out = df.select(pl.col("a").arr.join(pl.col("separator"), ignore_nulls=False))
421
assert out.to_dict(as_series=False) == {"a": [None, None, None, "c@d@e@f"]}
422
423
424
def test_array_explode() -> None:
425
df = pl.DataFrame(
426
{
427
"str": [["a", "b"], ["c", None], None],
428
"nested": [[[1, 2], [3]], [[], [4, None]], None],
429
"logical": [
430
[datetime.date(1998, 1, 1), datetime.date(2000, 10, 1)],
431
[datetime.date(2024, 1, 1), None],
432
None,
433
],
434
},
435
schema={
436
"str": pl.Array(pl.String, 2),
437
"nested": pl.Array(pl.List(pl.Int64), 2),
438
"logical": pl.Array(pl.Date, 2),
439
},
440
)
441
out = df.select(pl.all().arr.explode())
442
expected = pl.DataFrame(
443
{
444
"str": ["a", "b", "c", None, None],
445
"nested": [[1, 2], [3], [], [4, None], None],
446
"logical": [
447
datetime.date(1998, 1, 1),
448
datetime.date(2000, 10, 1),
449
datetime.date(2024, 1, 1),
450
None,
451
None,
452
],
453
}
454
)
455
assert_frame_equal(out, expected)
456
457
# test no-null fast path
458
s = pl.Series(
459
[
460
[datetime.date(1998, 1, 1), datetime.date(1999, 1, 3)],
461
[datetime.date(2000, 1, 1), datetime.date(2023, 10, 1)],
462
],
463
dtype=pl.Array(pl.Date, 2),
464
)
465
out_s = s.arr.explode()
466
expected_s = pl.Series(
467
[
468
datetime.date(1998, 1, 1),
469
datetime.date(1999, 1, 3),
470
datetime.date(2000, 1, 1),
471
datetime.date(2023, 10, 1),
472
],
473
dtype=pl.Date,
474
)
475
assert_series_equal(out_s, expected_s)
476
477
478
@pytest.mark.parametrize(
479
("arr", "data", "expected", "dtype"),
480
[
481
([[1, 2], [3, None], None], 1, [1, 0, None], pl.Int64),
482
([[True, False], [True, None], None], True, [1, 1, None], pl.Boolean),
483
([["a", "b"], ["c", None], None], "a", [1, 0, None], pl.String),
484
([[b"a", b"b"], [b"c", None], None], b"a", [1, 0, None], pl.Binary),
485
],
486
)
487
def test_array_count_matches(
488
arr: list[list[Any] | None], data: Any, expected: list[Any], dtype: pl.DataType
489
) -> None:
490
df = pl.DataFrame({"arr": arr}, schema={"arr": pl.Array(dtype, 2)})
491
out = df.select(count_matches=pl.col("arr").arr.count_matches(data))
492
assert out.to_dict(as_series=False) == {"count_matches": expected}
493
494
495
def test_array_count_matches_wildcard_expansion() -> None:
496
df = pl.DataFrame(
497
{"a": [[1, 2]], "b": [[3, 4]]},
498
schema={"a": pl.Array(pl.Int64, 2), "b": pl.Array(pl.Int64, 2)},
499
)
500
assert df.select(pl.all().arr.count_matches(3)).to_dict(as_series=False) == {
501
"a": [0],
502
"b": [1],
503
}
504
505
506
def test_array_to_struct() -> None:
507
df = pl.DataFrame(
508
{"a": [[1, 2, 3], [4, 5, None]]}, schema={"a": pl.Array(pl.Int8, 3)}
509
)
510
assert df.select([pl.col("a").arr.to_struct()]).to_series().to_list() == [
511
{"field_0": 1, "field_1": 2, "field_2": 3},
512
{"field_0": 4, "field_1": 5, "field_2": None},
513
]
514
515
df = pl.DataFrame(
516
{"a": [[1, 2, None], [1, 2, 3]]}, schema={"a": pl.Array(pl.Int8, 3)}
517
)
518
assert df.select(
519
pl.col("a").arr.to_struct(fields=lambda idx: f"col_name_{idx}")
520
).to_series().to_list() == [
521
{"col_name_0": 1, "col_name_1": 2, "col_name_2": None},
522
{"col_name_0": 1, "col_name_1": 2, "col_name_2": 3},
523
]
524
525
assert df.lazy().select(pl.col("a").arr.to_struct()).unnest(
526
"a"
527
).sum().collect().columns == ["field_0", "field_1", "field_2"]
528
529
530
def test_array_shift() -> None:
531
df = pl.DataFrame(
532
{"a": [[1, 2, 3], None, [4, 5, 6], [7, 8, 9]], "n": [None, 1, 1, -2]},
533
schema={"a": pl.Array(pl.Int64, 3), "n": pl.Int64},
534
)
535
536
out = df.select(
537
lit=pl.col("a").arr.shift(1), expr=pl.col("a").arr.shift(pl.col("n"))
538
)
539
expected = pl.DataFrame(
540
{
541
"lit": [[None, 1, 2], None, [None, 4, 5], [None, 7, 8]],
542
"expr": [None, None, [None, 4, 5], [9, None, None]],
543
},
544
schema={"lit": pl.Array(pl.Int64, 3), "expr": pl.Array(pl.Int64, 3)},
545
)
546
assert_frame_equal(out, expected)
547
548
549
def test_array_n_unique() -> None:
550
df = pl.DataFrame(
551
{
552
"a": [[1, 1, 2], [3, 3, 3], [None, None, None], None],
553
},
554
schema={"a": pl.Array(pl.Int64, 3)},
555
)
556
557
out = df.select(n_unique=pl.col("a").arr.n_unique())
558
expected = pl.DataFrame(
559
{"n_unique": [2, 1, 1, None]}, schema={"n_unique": pl.get_index_type()}
560
)
561
assert_frame_equal(out, expected)
562
563
564
def test_explode_19049() -> None:
565
df = pl.DataFrame({"a": [[1, 2, 3]]}, schema={"a": pl.Array(pl.Int64, 3)})
566
result_df = df.select(pl.col.a.arr.explode())
567
expected_df = pl.DataFrame({"a": [1, 2, 3]}, schema={"a": pl.Int64})
568
assert_frame_equal(result_df, expected_df)
569
570
df = pl.DataFrame({"a": [1, 2, 3]}, schema={"a": pl.Int64})
571
with pytest.raises(
572
InvalidOperationError,
573
match="expected Array datatype for array operation, got: Int64",
574
):
575
df.select(pl.col.a.arr.explode())
576
577
578
def test_array_join_unequal_lengths_22018() -> None:
579
df = pl.DataFrame(
580
[
581
pl.Series(
582
"a",
583
[
584
["a", "b", "d"],
585
["ya", "x", "y"],
586
["ya", "x", "y"],
587
],
588
pl.Array(pl.String, 3),
589
),
590
]
591
)
592
with pytest.raises(pl.exceptions.ShapeError):
593
df.select(pl.col.a.arr.join(pl.Series([",", "-"])))
594
595
596
def test_array_shift_unequal_lengths_22018() -> None:
597
with pytest.raises(pl.exceptions.ShapeError):
598
pl.Series(
599
"a",
600
[
601
["a", "b", "d"],
602
["a", "b", "d"],
603
["a", "b", "d"],
604
],
605
pl.Array(pl.String, 3),
606
).arr.shift(pl.Series([1, 2]))
607
608
609
def test_array_shift_self_broadcast_22124() -> None:
610
assert_series_equal(
611
pl.Series(
612
"a",
613
[
614
["a", "b", "d"],
615
],
616
pl.Array(pl.String, 3),
617
).arr.shift(pl.Series([1, 2])),
618
pl.Series(
619
"a",
620
[
621
[None, "a", "b"],
622
[None, None, "a"],
623
],
624
pl.Array(pl.String, 3),
625
),
626
)
627
628
629
def test_arr_contains() -> None:
630
s = pl.Series([[1, 2, None], [None, None, None], None], dtype=pl.Array(pl.Int64, 3))
631
632
assert_series_equal(
633
s.arr.contains(None, nulls_equal=False),
634
pl.Series([None, None, None], dtype=pl.Boolean),
635
)
636
assert_series_equal(
637
s.arr.contains(None, nulls_equal=True),
638
pl.Series([True, True, None], dtype=pl.Boolean),
639
)
640
assert_series_equal(
641
s.arr.contains(1, nulls_equal=False),
642
pl.Series([True, False, None], dtype=pl.Boolean),
643
)
644
assert_series_equal(
645
s.arr.contains(1, nulls_equal=True),
646
pl.Series([True, False, None], dtype=pl.Boolean),
647
)
648
649
650
@pytest.mark.parametrize(
651
"expr",
652
[
653
pl.col("a").arr.contains("z"),
654
pl.col("a").arr.explode(),
655
pl.col("a").arr.sum(),
656
pl.col("a").arr.to_list(),
657
pl.col("a").arr.to_struct(),
658
pl.col("a").arr.unique(),
659
pl.col("a").arr.all(),
660
pl.col("a").arr.any(),
661
pl.col("a").arr.arg_max(),
662
pl.col("a").arr.arg_min(),
663
pl.col("a").arr.count_matches("z"),
664
pl.col("a").arr.first(),
665
pl.col("a").arr.get(0),
666
pl.col("a").arr.join(""),
667
pl.col("a").arr.last(),
668
pl.col("a").arr.len(),
669
pl.col("a").arr.max(),
670
pl.col("a").arr.mean(),
671
pl.col("a").arr.median(),
672
pl.col("a").arr.min(),
673
pl.col("a").arr.n_unique(),
674
pl.col("a").arr.reverse(),
675
pl.col("a").arr.shift(1),
676
pl.col("a").arr.sort(),
677
pl.col("a").arr.std(),
678
pl.col("a").arr.var(),
679
],
680
)
681
def test_schema_non_array(expr: pl.Expr) -> None:
682
lf = pl.LazyFrame({"a": ["a", "b", "c"]})
683
684
with pytest.raises(
685
InvalidOperationError,
686
match="expected Array datatype for array operation, got: String",
687
):
688
lf.select(expr).collect_schema()
689
690
691
def test_array_get_broadcast_26217() -> None:
692
df = pl.DataFrame({"idx": [0, 1, 2, 1, 2, 0, 1]})
693
out = df.select(pl.lit([42, 13, 37], pl.Array(pl.UInt8, 3)).arr.get(pl.col.idx))
694
expected = pl.DataFrame(
695
{"literal": [42, 13, 37, 13, 37, 42, 13]}, schema={"literal": pl.UInt8}
696
)
697
assert_frame_equal(out, expected)
698
699