Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/expr/test_binary.py
7884 views
1
from __future__ import annotations
2
3
import polars as pl
4
from polars.testing import assert_frame_equal
5
6
7
def test_binary_slice_basic() -> None:
8
"""Test basic binary slicing with positive offset and length."""
9
df = pl.DataFrame(
10
{
11
"data": [
12
b"\x00\x01\x02\x03\x04",
13
b"\xff\xfe\xfd\xfc\xfb",
14
b"\x10\x20\x30",
15
None,
16
]
17
}
18
)
19
20
result = df.select(pl.col("data").bin.slice(1, 3).alias("sliced"))
21
expected = pl.DataFrame(
22
{
23
"sliced": [
24
b"\x01\x02\x03",
25
b"\xfe\xfd\xfc",
26
b"\x20\x30",
27
None,
28
]
29
}
30
)
31
assert_frame_equal(result, expected)
32
33
34
def test_binary_slice_negative_offset() -> None:
35
"""Test binary slicing with negative offset."""
36
df = pl.DataFrame(
37
{
38
"data": [
39
b"\x00\x01\x02\x03\x04",
40
b"\xff\xfe\xfd\xfc\xfb",
41
b"\x10\x20\x30",
42
None,
43
]
44
}
45
)
46
47
result = df.select(pl.col("data").bin.slice(-3, 2).alias("sliced"))
48
expected = pl.DataFrame(
49
{
50
"sliced": [
51
b"\x02\x03",
52
b"\xfd\xfc",
53
b"\x10\x20",
54
None,
55
]
56
}
57
)
58
assert_frame_equal(result, expected)
59
60
61
def test_binary_slice_to_end() -> None:
62
"""Test binary slicing to end (no length specified)."""
63
df = pl.DataFrame(
64
{
65
"data": [
66
b"\x00\x01\x02\x03\x04",
67
b"\xff\xfe\xfd\xfc\xfb",
68
b"\x10\x20\x30",
69
None,
70
]
71
}
72
)
73
74
result = df.select(pl.col("data").bin.slice(2).alias("sliced"))
75
expected = pl.DataFrame(
76
{
77
"sliced": [
78
b"\x02\x03\x04",
79
b"\xfd\xfc\xfb",
80
b"\x30",
81
None,
82
]
83
}
84
)
85
assert_frame_equal(result, expected)
86
87
88
def test_binary_slice_with_expression() -> None:
89
"""Test binary slicing with offset as expression."""
90
df = pl.DataFrame(
91
{
92
"data": [
93
b"\x00\x01\x02\x03\x04",
94
b"\xff\xfe\xfd\xfc\xfb",
95
b"\x10\x20\x30",
96
None,
97
],
98
"offset": [0, 1, 2, 0],
99
}
100
)
101
102
result = df.select(pl.col("data").bin.slice(pl.col("offset"), 2).alias("sliced"))
103
expected = pl.DataFrame(
104
{
105
"sliced": [
106
b"\x00\x01",
107
b"\xfe\xfd",
108
b"\x30",
109
None,
110
]
111
}
112
)
113
assert_frame_equal(result, expected)
114
115
116
def test_binary_slice_zero_length() -> None:
117
"""Test binary slicing with zero length."""
118
df = pl.DataFrame({"data": [b"\x00\x01\x02\x03\x04"]})
119
120
result = df.select(pl.col("data").bin.slice(1, 0).alias("sliced"))
121
expected = pl.DataFrame({"sliced": [b""]})
122
assert_frame_equal(result, expected)
123
124
125
def test_binary_slice_out_of_bounds() -> None:
126
"""Test binary slicing with out of bounds indices."""
127
df = pl.DataFrame({"data": [b"\x00\x01\x02"]})
128
129
# Offset beyond length
130
result = df.select(pl.col("data").bin.slice(10, 2).alias("sliced"))
131
expected = pl.DataFrame({"sliced": [b""]})
132
assert_frame_equal(result, expected)
133
134
# Length beyond available data
135
result = df.select(pl.col("data").bin.slice(1, 100).alias("sliced"))
136
expected = pl.DataFrame({"sliced": [b"\x01\x02"]})
137
assert_frame_equal(result, expected)
138
139
140
def test_binary_head_basic() -> None:
141
"""Test basic binary head with positive n."""
142
df = pl.DataFrame(
143
{
144
"data": [
145
b"\x00\x01\x02\x03\x04",
146
b"\xff\xfe\xfd",
147
b"\x10",
148
None,
149
]
150
}
151
)
152
153
result = df.select(pl.col("data").bin.head(3).alias("head"))
154
expected = pl.DataFrame(
155
{
156
"head": [
157
b"\x00\x01\x02",
158
b"\xff\xfe\xfd",
159
b"\x10",
160
None,
161
]
162
}
163
)
164
assert_frame_equal(result, expected)
165
166
167
def test_binary_head_larger_than_data() -> None:
168
"""Test binary head with n larger than data length."""
169
df = pl.DataFrame(
170
{
171
"data": [
172
b"\x00\x01\x02\x03\x04",
173
b"\xff\xfe\xfd",
174
b"\x10",
175
None,
176
]
177
}
178
)
179
180
result = df.select(pl.col("data").bin.head(10).alias("head"))
181
expected = pl.DataFrame(
182
{
183
"head": [
184
b"\x00\x01\x02\x03\x04",
185
b"\xff\xfe\xfd",
186
b"\x10",
187
None,
188
]
189
}
190
)
191
assert_frame_equal(result, expected)
192
193
194
def test_binary_head_negative() -> None:
195
"""Test binary head with negative n (all but last n)."""
196
df = pl.DataFrame(
197
{
198
"data": [
199
b"\x00\x01\x02\x03\x04",
200
b"\xff\xfe\xfd",
201
b"\x10",
202
None,
203
]
204
}
205
)
206
207
result = df.select(pl.col("data").bin.head(-2).alias("head"))
208
expected = pl.DataFrame(
209
{
210
"head": [
211
b"\x00\x01\x02",
212
b"\xff",
213
b"",
214
None,
215
]
216
}
217
)
218
assert_frame_equal(result, expected)
219
220
221
def test_binary_head_zero() -> None:
222
"""Test binary head with n=0."""
223
df = pl.DataFrame(
224
{
225
"data": [
226
b"\x00\x01\x02\x03\x04",
227
b"\xff\xfe\xfd",
228
b"\x10",
229
None,
230
]
231
}
232
)
233
234
result = df.select(pl.col("data").bin.head(0).alias("head"))
235
expected = pl.DataFrame({"head": [b"", b"", b"", None]})
236
assert_frame_equal(result, expected)
237
238
239
def test_binary_head_with_expression() -> None:
240
"""Test binary head with n as expression."""
241
df = pl.DataFrame(
242
{
243
"data": [
244
b"\x00\x01\x02\x03\x04",
245
b"\xff\xfe\xfd",
246
b"\x10",
247
None,
248
],
249
"n": [2, 1, 1, 0],
250
}
251
)
252
253
result = df.select(pl.col("data").bin.head(pl.col("n")).alias("head"))
254
expected = pl.DataFrame(
255
{
256
"head": [
257
b"\x00\x01",
258
b"\xff",
259
b"\x10",
260
None,
261
]
262
}
263
)
264
assert_frame_equal(result, expected)
265
266
267
def test_binary_head_default() -> None:
268
"""Test binary head with default n=5."""
269
df = pl.DataFrame({"data": [b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"]})
270
271
result = df.select(pl.col("data").bin.head().alias("head"))
272
expected = pl.DataFrame({"head": [b"\x00\x01\x02\x03\x04"]})
273
assert_frame_equal(result, expected)
274
275
276
def test_binary_tail_basic() -> None:
277
"""Test basic binary tail with positive n."""
278
df = pl.DataFrame(
279
{
280
"data": [
281
b"\x00\x01\x02\x03\x04",
282
b"\xff\xfe\xfd",
283
b"\x10",
284
None,
285
]
286
}
287
)
288
289
result = df.select(pl.col("data").bin.tail(3).alias("tail"))
290
expected = pl.DataFrame(
291
{
292
"tail": [
293
b"\x02\x03\x04",
294
b"\xff\xfe\xfd",
295
b"\x10",
296
None,
297
]
298
}
299
)
300
assert_frame_equal(result, expected)
301
302
303
def test_binary_tail_larger_than_data() -> None:
304
"""Test binary tail with n larger than data length."""
305
df = pl.DataFrame(
306
{
307
"data": [
308
b"\x00\x01\x02\x03\x04",
309
b"\xff\xfe\xfd",
310
b"\x10",
311
None,
312
]
313
}
314
)
315
316
result = df.select(pl.col("data").bin.tail(10).alias("tail"))
317
expected = pl.DataFrame(
318
{
319
"tail": [
320
b"\x00\x01\x02\x03\x04",
321
b"\xff\xfe\xfd",
322
b"\x10",
323
None,
324
]
325
}
326
)
327
assert_frame_equal(result, expected)
328
329
330
def test_binary_tail_negative() -> None:
331
"""Test binary tail with negative n (all but first n)."""
332
df = pl.DataFrame(
333
{
334
"data": [
335
b"\x00\x01\x02\x03\x04",
336
b"\xff\xfe\xfd",
337
b"\x10",
338
None,
339
]
340
}
341
)
342
343
result = df.select(pl.col("data").bin.tail(-2).alias("tail"))
344
expected = pl.DataFrame(
345
{
346
"tail": [
347
b"\x02\x03\x04",
348
b"\xfd",
349
b"",
350
None,
351
]
352
}
353
)
354
assert_frame_equal(result, expected)
355
356
357
def test_binary_tail_zero() -> None:
358
"""Test binary tail with n=0."""
359
df = pl.DataFrame(
360
{
361
"data": [
362
b"\x00\x01\x02\x03\x04",
363
b"\xff\xfe\xfd",
364
b"\x10",
365
None,
366
]
367
}
368
)
369
370
result = df.select(pl.col("data").bin.tail(0).alias("tail"))
371
expected = pl.DataFrame({"tail": [b"", b"", b"", None]})
372
assert_frame_equal(result, expected)
373
374
375
def test_binary_tail_with_expression() -> None:
376
"""Test binary tail with n as expression."""
377
df = pl.DataFrame(
378
{
379
"data": [
380
b"\x00\x01\x02\x03\x04",
381
b"\xff\xfe\xfd",
382
b"\x10",
383
None,
384
],
385
"n": [2, 1, 1, 0],
386
}
387
)
388
389
result = df.select(pl.col("data").bin.tail(pl.col("n")).alias("tail"))
390
expected = pl.DataFrame(
391
{
392
"tail": [
393
b"\x03\x04",
394
b"\xfd",
395
b"\x10",
396
None,
397
]
398
}
399
)
400
assert_frame_equal(result, expected)
401
402
403
def test_binary_tail_default() -> None:
404
"""Test binary tail with default n=5."""
405
df = pl.DataFrame({"data": [b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"]})
406
407
result = df.select(pl.col("data").bin.tail().alias("tail"))
408
expected = pl.DataFrame({"tail": [b"\x05\x06\x07\x08\x09"]})
409
assert_frame_equal(result, expected)
410
411
412
def test_binary_head_then_tail() -> None:
413
"""Test chaining head and tail operations."""
414
df = pl.DataFrame({"data": [b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"]})
415
416
result = df.select(pl.col("data").bin.head(8).bin.tail(6).alias("middle"))
417
expected = pl.DataFrame({"middle": [b"\x02\x03\x04\x05\x06\x07"]})
418
assert_frame_equal(result, expected)
419
420
421
def test_binary_slice_then_head() -> None:
422
"""Test chaining slice and head operations."""
423
df = pl.DataFrame({"data": [b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"]})
424
425
result = df.select(pl.col("data").bin.slice(2, 6).bin.head(3).alias("combo"))
426
expected = pl.DataFrame({"combo": [b"\x02\x03\x04"]})
427
assert_frame_equal(result, expected)
428
429
430
def test_binary_tail_then_slice() -> None:
431
"""Test chaining tail and slice operations."""
432
df = pl.DataFrame({"data": [b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"]})
433
434
result = df.select(pl.col("data").bin.tail(7).bin.slice(1, 4).alias("combo"))
435
expected = pl.DataFrame({"combo": [b"\x04\x05\x06\x07"]})
436
assert_frame_equal(result, expected)
437
438
439
def test_binary_empty() -> None:
440
"""Test operations on empty binary data."""
441
df = pl.DataFrame({"data": [b""]})
442
443
assert_frame_equal(
444
df.select(pl.col("data").bin.slice(0, 5)), pl.DataFrame({"data": [b""]})
445
)
446
assert_frame_equal(
447
df.select(pl.col("data").bin.head(5)), pl.DataFrame({"data": [b""]})
448
)
449
assert_frame_equal(
450
df.select(pl.col("data").bin.tail(5)), pl.DataFrame({"data": [b""]})
451
)
452
453
454
def test_binary_all_nulls() -> None:
455
"""Test operations on all-null column."""
456
df = pl.DataFrame({"data": [None, None, None]}, schema={"data": pl.Binary})
457
458
assert_frame_equal(df.select(pl.col("data").bin.slice(0, 2)), df)
459
assert_frame_equal(df.select(pl.col("data").bin.head(2)), df)
460
assert_frame_equal(df.select(pl.col("data").bin.tail(2)), df)
461
462
463
def test_binary_single_byte() -> None:
464
"""Test operations on single-byte binary data."""
465
df = pl.DataFrame({"data": [b"\xff"]})
466
467
assert_frame_equal(df.select(pl.col("data").bin.slice(0, 1)), df)
468
assert_frame_equal(df.select(pl.col("data").bin.head(1)), df)
469
assert_frame_equal(df.select(pl.col("data").bin.tail(1)), df)
470
assert_frame_equal(
471
df.select(pl.col("data").bin.slice(0, 0)), pl.DataFrame({"data": [b""]})
472
)
473
474