Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/functions/range/test_linear_space.py
6939 views
1
from __future__ import annotations
2
3
import re
4
from datetime import date, datetime
5
from typing import TYPE_CHECKING, Any
6
7
import numpy as np
8
import pytest
9
10
import polars as pl
11
from polars.exceptions import ComputeError, InvalidOperationError, ShapeError
12
from polars.testing import assert_frame_equal, assert_series_equal
13
14
if TYPE_CHECKING:
15
from polars import Expr
16
from polars._typing import ClosedInterval, PolarsDataType
17
18
19
@pytest.mark.parametrize(
20
("start", "end"),
21
[
22
(0, 0),
23
(0, 1),
24
(-1, 0),
25
(-2.1, 3.4),
26
],
27
)
28
@pytest.mark.parametrize("num_samples", [0, 1, 2, 5, 1_000])
29
@pytest.mark.parametrize("interval", ["both", "left", "right", "none"])
30
@pytest.mark.parametrize("eager", [True, False])
31
def test_linear_space_values(
32
start: int | float,
33
end: int | float,
34
num_samples: int,
35
interval: ClosedInterval,
36
eager: bool,
37
) -> None:
38
if eager:
39
result = pl.linear_space(
40
start, end, num_samples, closed=interval, eager=True
41
).rename("ls")
42
else:
43
result = pl.select(
44
ls=pl.linear_space(start, end, num_samples, closed=interval)
45
).to_series()
46
47
if interval == "both":
48
expected = pl.Series("ls", np.linspace(start, end, num_samples))
49
elif interval == "left":
50
expected = pl.Series("ls", np.linspace(start, end, num_samples, endpoint=False))
51
elif interval == "right":
52
expected = pl.Series("ls", np.linspace(start, end, num_samples + 1)[1:])
53
elif interval == "none":
54
expected = pl.Series("ls", np.linspace(start, end, num_samples + 2)[1:-1])
55
56
assert_series_equal(result, expected)
57
58
59
def test_linear_space_expr() -> None:
60
lf = pl.LazyFrame({"a": [1, 2, 3, 4, 5]})
61
62
result = lf.select(pl.linear_space(0, pl.col("a").len(), 3))
63
expected = lf.select(literal=pl.Series([0.0, 2.5, 5.0], dtype=pl.Float64))
64
assert_frame_equal(result, expected)
65
66
result = lf.select(pl.linear_space(pl.col("a").len(), 0, 3))
67
expected = lf.select(a=pl.Series([5.0, 2.5, 0.0], dtype=pl.Float64))
68
assert_frame_equal(result, expected)
69
70
71
@pytest.mark.parametrize(
72
("dtype_start", "dtype_end", "dtype_expected"),
73
[
74
(pl.Float32, pl.Float32, pl.Float32),
75
(pl.Float32, pl.Float64, pl.Float64),
76
(pl.Float64, pl.Float32, pl.Float64),
77
(pl.Float64, pl.Float64, pl.Float64),
78
(pl.UInt8, pl.UInt32, pl.Float64),
79
(pl.Int16, pl.Int128, pl.Float64),
80
(pl.Int8, pl.Float64, pl.Float64),
81
],
82
)
83
def test_linear_space_numeric_dtype(
84
dtype_start: PolarsDataType,
85
dtype_end: PolarsDataType,
86
dtype_expected: PolarsDataType,
87
) -> None:
88
lf = pl.LazyFrame()
89
result = lf.select(
90
ls=pl.linear_space(pl.lit(0, dtype=dtype_start), pl.lit(1, dtype=dtype_end), 6)
91
)
92
expected = lf.select(
93
ls=pl.Series([0.0, 0.2, 0.4, 0.6, 0.8, 1.0], dtype=dtype_expected)
94
)
95
assert_frame_equal(result, expected)
96
97
98
def test_linear_space_date() -> None:
99
d1 = date(2025, 1, 1)
100
d2 = date(2025, 2, 1)
101
out_values = [
102
datetime(2025, 1, 1),
103
datetime(2025, 1, 11, 8),
104
datetime(2025, 1, 21, 16),
105
datetime(2025, 2, 1),
106
]
107
lf = pl.LazyFrame()
108
109
result = lf.select(ls=pl.linear_space(d1, d2, 4, closed="both"))
110
expected = lf.select(ls=pl.Series(out_values, dtype=pl.Datetime("us")))
111
assert_frame_equal(result, expected)
112
113
result = lf.select(ls=pl.linear_space(d1, d2, 3, closed="left"))
114
expected = lf.select(ls=pl.Series(out_values[:-1], dtype=pl.Datetime("us")))
115
assert_frame_equal(result, expected)
116
117
result = lf.select(ls=pl.linear_space(d1, d2, 3, closed="right"))
118
expected = lf.select(ls=pl.Series(out_values[1:], dtype=pl.Datetime("us")))
119
assert_frame_equal(result, expected)
120
121
result = lf.select(ls=pl.linear_space(d1, d2, 2, closed="none"))
122
expected = lf.select(ls=pl.Series(out_values[1:-1], dtype=pl.Datetime("us")))
123
assert_frame_equal(result, expected)
124
125
126
@pytest.mark.parametrize(
127
"dtype",
128
[
129
pl.Datetime("ms", None),
130
pl.Datetime("ms", time_zone="Asia/Tokyo"),
131
pl.Datetime("us", None),
132
pl.Datetime("us", time_zone="Asia/Tokyo"),
133
pl.Datetime("ns", time_zone="Asia/Tokyo"),
134
pl.Time,
135
pl.Duration("ms"),
136
pl.Duration("us"),
137
pl.Duration("ns"),
138
],
139
)
140
def test_linear_space_temporal(dtype: PolarsDataType) -> None:
141
# All temporal types except for Date, which is tested above.
142
start = 0
143
end = 1_000_000_000
144
145
lf = pl.LazyFrame()
146
147
result_int = lf.select(
148
ls=pl.linear_space(start, end, 11).cast(pl.Int64).cast(dtype)
149
)
150
result_dt = lf.select(
151
ls=pl.linear_space(pl.lit(start, dtype=dtype), pl.lit(end, dtype=dtype), 11)
152
)
153
154
assert_frame_equal(result_int, result_dt)
155
156
157
@pytest.mark.parametrize(
158
("dtype1", "dtype2", "str1", "str2"),
159
[
160
(pl.Date, pl.Datetime("ms"), "Date", "Datetime('ms')"),
161
(
162
pl.Datetime("ms"),
163
pl.Datetime("ns"),
164
"Datetime('ms')",
165
"Datetime('ns')",
166
),
167
(pl.Datetime("us"), pl.Time, "Datetime('μs')", "Time"),
168
(
169
pl.Duration("us"),
170
pl.Duration("ms"),
171
"Duration('μs')",
172
"Duration('ms')",
173
),
174
(pl.Int32, pl.String, "Int32", "String"),
175
],
176
)
177
def test_linear_space_incompatible_dtypes(
178
dtype1: PolarsDataType,
179
dtype2: PolarsDataType,
180
str1: str,
181
str2: str,
182
) -> None:
183
value1 = pl.lit(0, dtype1)
184
value2 = pl.lit(1, dtype2)
185
with pytest.raises(
186
ComputeError,
187
match=re.escape(
188
f"'start' and 'end' have incompatible dtypes, got {str1} and {str2}"
189
),
190
):
191
pl.linear_space(value1, value2, 11, eager=True)
192
193
194
def test_linear_space_expr_wrong_length() -> None:
195
df = pl.DataFrame({"a": [1, 2, 3, 4, 5]})
196
msg = "unable to add a column of length 6 to a DataFrame of height 5"
197
streaming_msg = "zip node received non-equal length inputs"
198
with pytest.raises(ShapeError, match=rf"({msg})|({streaming_msg})"):
199
df.with_columns(pl.linear_space(0, 1, 6))
200
201
202
def test_linear_space_num_samples_expr() -> None:
203
lf = pl.LazyFrame({"a": [1, 2, 3, 4, 5]})
204
result = lf.with_columns(ls=pl.linear_space(0, 1, pl.len(), closed="left"))
205
expected = lf.with_columns(ls=pl.Series([0, 0.2, 0.4, 0.6, 0.8], dtype=pl.Float64))
206
assert_frame_equal(result, expected)
207
208
209
def test_linear_space_invalid_num_samples_expr() -> None:
210
lf = pl.LazyFrame({"x": [1, 2, 3]})
211
with pytest.raises(ShapeError):
212
lf.select(pl.linear_space(0, 1, pl.col("x"))).collect()
213
214
215
@pytest.mark.parametrize("interval", ["both", "left", "right", "none"])
216
def test_linear_spaces_values(interval: ClosedInterval) -> None:
217
starts = [
218
None, 0.0, 0.0, 0.0, 0.0,
219
0.0, None, 0.0, 0.0, 0.0,
220
-1.0, -1.0, None, -1.0, -1.0,
221
-2.1, -2.1, -2.1, None, -2.1,
222
] # fmt: skip
223
224
ends = [
225
0.0, None, 0.0, 0.0, 0.0,
226
1.0, 1.0, None, 1.0, 1.0,
227
0.0, 0.0, 0.0, None, 0.0,
228
3.4, 3.4, 3.4, 3.4, None,
229
] # fmt: skip
230
231
num_samples = [
232
0, 1, None, 5, 1_1000,
233
0, 1, 2, 5, None,
234
0, 1, 2, 5, 1_1000,
235
0, 1, 2, 5, 1_1000,
236
] # fmt: skip
237
238
df = pl.DataFrame(
239
{
240
"start": starts,
241
"end": ends,
242
"num_samples": num_samples,
243
}
244
)
245
246
out = df.select(pl.linear_spaces("start", "end", "num_samples", closed=interval))[
247
"start"
248
]
249
250
# We check each element against the output from pl.linear_space(), which is
251
# validated above.
252
for row, start, end, ns in zip(out, starts, ends, num_samples):
253
if start is None or end is None or ns is None:
254
assert row is None
255
else:
256
expected = pl.linear_space(
257
start, end, ns, eager=True, closed=interval
258
).rename("")
259
assert_series_equal(row, expected)
260
261
262
@pytest.mark.parametrize("interval", ["both", "left", "right", "none"])
263
def test_linear_spaces_one_numeric(interval: ClosedInterval) -> None:
264
# Two expressions, one numeric input
265
starts = [1, 2]
266
ends = [5, 6]
267
num_samples = [3, 4]
268
lf = pl.LazyFrame(
269
{
270
"start": starts,
271
"end": ends,
272
"num_samples": num_samples,
273
}
274
)
275
result = lf.select(
276
pl.linear_spaces(starts[0], "end", "num_samples", closed=interval).alias(
277
"start"
278
),
279
pl.linear_spaces("start", ends[0], "num_samples", closed=interval).alias("end"),
280
pl.linear_spaces("start", "end", num_samples[0], closed=interval).alias(
281
"num_samples"
282
),
283
)
284
expected_start0 = pl.linear_space(
285
starts[0], ends[0], num_samples[0], closed=interval, eager=True
286
)
287
expected_start1 = pl.linear_space(
288
starts[0], ends[1], num_samples[1], closed=interval, eager=True
289
)
290
expected_end0 = pl.linear_space(
291
starts[0], ends[0], num_samples[0], closed=interval, eager=True
292
)
293
expected_end1 = pl.linear_space(
294
starts[1], ends[0], num_samples[1], closed=interval, eager=True
295
)
296
expected_ns0 = pl.linear_space(
297
starts[0], ends[0], num_samples[0], closed=interval, eager=True
298
)
299
expected_ns1 = pl.linear_space(
300
starts[1], ends[1], num_samples[0], closed=interval, eager=True
301
)
302
expected = pl.LazyFrame(
303
{
304
"start": [expected_start0, expected_start1],
305
"end": [expected_end0, expected_end1],
306
"num_samples": [expected_ns0, expected_ns1],
307
}
308
)
309
assert_frame_equal(result, expected)
310
311
312
@pytest.mark.parametrize("interval", ["both", "left", "right", "none"])
313
def test_linear_spaces_two_numeric(interval: ClosedInterval) -> None:
314
# One expression, two numeric inputs
315
starts = [1, 2]
316
ends = [5, 6]
317
num_samples = [3, 4]
318
lf = pl.LazyFrame(
319
{
320
"start": starts,
321
"end": ends,
322
"num_samples": num_samples,
323
}
324
)
325
result = lf.select(
326
pl.linear_spaces("start", ends[0], num_samples[0], closed=interval).alias(
327
"start"
328
),
329
pl.linear_spaces(starts[0], "end", num_samples[0], closed=interval).alias(
330
"end"
331
),
332
pl.linear_spaces(starts[0], ends[0], "num_samples", closed=interval).alias(
333
"num_samples"
334
),
335
)
336
expected_start0 = pl.linear_space(
337
starts[0], ends[0], num_samples[0], closed=interval, eager=True
338
)
339
expected_start1 = pl.linear_space(
340
starts[1], ends[0], num_samples[0], closed=interval, eager=True
341
)
342
expected_end0 = pl.linear_space(
343
starts[0], ends[0], num_samples[0], closed=interval, eager=True
344
)
345
expected_end1 = pl.linear_space(
346
starts[0], ends[1], num_samples[0], closed=interval, eager=True
347
)
348
expected_ns0 = pl.linear_space(
349
starts[0], ends[0], num_samples[0], closed=interval, eager=True
350
)
351
expected_ns1 = pl.linear_space(
352
starts[0], ends[0], num_samples[1], closed=interval, eager=True
353
)
354
expected = pl.LazyFrame(
355
{
356
"start": [expected_start0, expected_start1],
357
"end": [expected_end0, expected_end1],
358
"num_samples": [expected_ns0, expected_ns1],
359
}
360
)
361
assert_frame_equal(result, expected)
362
363
364
@pytest.mark.parametrize(
365
"num_samples",
366
[
367
5,
368
pl.lit(5),
369
pl.lit(5, dtype=pl.UInt8),
370
pl.lit(5, dtype=pl.UInt16),
371
pl.lit(5, dtype=pl.UInt32),
372
pl.lit(5, dtype=pl.UInt64),
373
pl.lit(5, dtype=pl.Int8),
374
pl.lit(5, dtype=pl.Int16),
375
pl.lit(5, dtype=pl.Int32),
376
pl.lit(5, dtype=pl.Int64),
377
],
378
)
379
@pytest.mark.parametrize("interval", ["both", "left", "right", "none"])
380
@pytest.mark.parametrize(
381
"dtype",
382
[
383
pl.Float32,
384
pl.Float64,
385
pl.Datetime,
386
],
387
)
388
def test_linear_spaces_as_array(
389
interval: ClosedInterval,
390
num_samples: int | Expr,
391
dtype: PolarsDataType,
392
) -> None:
393
starts = [1, 2]
394
ends = [5, 6]
395
lf = pl.LazyFrame(
396
{
397
"start": pl.Series(starts, dtype=dtype),
398
"end": pl.Series(ends, dtype=dtype),
399
}
400
)
401
result = lf.select(
402
a=pl.linear_spaces("start", "end", num_samples, closed=interval, as_array=True)
403
)
404
expected_0 = pl.linear_space(
405
pl.lit(starts[0], dtype=dtype),
406
pl.lit(ends[0], dtype=dtype),
407
num_samples,
408
closed=interval,
409
eager=True,
410
)
411
expected_1 = pl.linear_space(
412
pl.lit(starts[1], dtype=dtype),
413
pl.lit(ends[1], dtype=dtype),
414
num_samples,
415
closed=interval,
416
eager=True,
417
)
418
expected = pl.LazyFrame(
419
{"a": pl.Series([expected_0, expected_1], dtype=pl.Array(dtype, 5))}
420
)
421
assert_frame_equal(result, expected)
422
423
424
@pytest.mark.parametrize("bad_num_samples", [pl.lit("a"), 1.0, "num_samples"])
425
def test_linear_space_invalid_as_array(bad_num_samples: Any) -> None:
426
lf = pl.LazyFrame(
427
{
428
"start": [1, 2],
429
"end": [5, 6],
430
"num_samples": [2, 4],
431
}
432
)
433
with pytest.raises(
434
InvalidOperationError,
435
match="'as_array' is only valid when 'num_samples' is a constant integer",
436
):
437
lf.select(pl.linear_spaces("starts", "ends", bad_num_samples, as_array=True))
438
439
440
@pytest.mark.parametrize("interval", ["both", "left", "right", "none"])
441
def test_linear_spaces_numeric_input(interval: ClosedInterval) -> None:
442
starts = [1, 2]
443
ends = [5, 6]
444
num_samples = [3, 4]
445
lf = pl.LazyFrame(
446
{
447
"start": starts,
448
"end": ends,
449
"num_samples": num_samples,
450
}
451
)
452
result = lf.select(
453
pl.linear_spaces("start", "end", "num_samples", closed=interval).alias("all"),
454
pl.linear_spaces(0, "end", "num_samples", closed=interval).alias("start"),
455
pl.linear_spaces("start", 10, "num_samples", closed=interval).alias("end"),
456
pl.linear_spaces("start", "end", 8, closed=interval).alias("num_samples"),
457
)
458
expected_all0 = pl.linear_space(
459
starts[0], ends[0], num_samples[0], closed=interval, eager=True
460
)
461
expected_all1 = pl.linear_space(
462
starts[1], ends[1], num_samples[1], closed=interval, eager=True
463
)
464
expected_start0 = pl.linear_space(
465
0, ends[0], num_samples[0], closed=interval, eager=True
466
)
467
expected_start1 = pl.linear_space(
468
0, ends[1], num_samples[1], closed=interval, eager=True
469
)
470
expected_end0 = pl.linear_space(
471
starts[0], 10, num_samples[0], closed=interval, eager=True
472
)
473
expected_end1 = pl.linear_space(
474
starts[1], 10, num_samples[1], closed=interval, eager=True
475
)
476
expected_ns0 = pl.linear_space(starts[0], ends[0], 8, closed=interval, eager=True)
477
expected_ns1 = pl.linear_space(starts[1], ends[1], 8, closed=interval, eager=True)
478
expected = pl.LazyFrame(
479
{
480
"all": [expected_all0, expected_all1],
481
"start": [expected_start0, expected_start1],
482
"end": [expected_end0, expected_end1],
483
"num_samples": [expected_ns0, expected_ns1],
484
}
485
)
486
assert_frame_equal(result, expected)
487
488
489
def test_linear_spaces_date() -> None:
490
d1 = date(2025, 1, 1)
491
d2 = date(2025, 2, 1)
492
493
lf = pl.LazyFrame(
494
{
495
"start": [None, d1, d1, d1, None, d1, d1, d1],
496
"end": [d2, None, d2, d2, d2, None, d2, d2],
497
"num_samples": [3, 3, None, 3, 4, 4, None, 4],
498
}
499
)
500
501
result = lf.select(pl.linear_spaces("start", "end", "num_samples"))
502
expected = pl.LazyFrame(
503
{
504
"start": pl.Series(
505
[
506
None,
507
None,
508
None,
509
[
510
datetime(2025, 1, 1),
511
datetime(2025, 1, 16, 12),
512
datetime(2025, 2, 1),
513
],
514
None,
515
None,
516
None,
517
[
518
datetime(2025, 1, 1),
519
datetime(2025, 1, 11, 8),
520
datetime(2025, 1, 21, 16),
521
datetime(2025, 2, 1),
522
],
523
],
524
dtype=pl.List(pl.Datetime(time_unit="us")),
525
)
526
}
527
)
528
assert_frame_equal(result, expected)
529
530
531
@pytest.mark.parametrize(
532
"dtype",
533
[
534
pl.Datetime("ms", None),
535
pl.Datetime("ms", time_zone="Asia/Tokyo"),
536
pl.Datetime("us", None),
537
pl.Datetime("us", time_zone="Asia/Tokyo"),
538
pl.Datetime("ns", time_zone="Asia/Tokyo"),
539
pl.Time,
540
pl.Duration("ms"),
541
pl.Duration("us"),
542
pl.Duration("ns"),
543
],
544
)
545
def test_linear_spaces_temporal(dtype: PolarsDataType) -> None:
546
# All temporal types except for Date, which is tested above.
547
start = 0
548
end = 1_000_000_000
549
550
lf = pl.LazyFrame(
551
{
552
"start": [start, start],
553
"end": [end, end],
554
"num_samples": [10, 15],
555
}
556
)
557
lf_temporal = lf.select(pl.col("start", "end").cast(dtype), "num_samples")
558
result_int = lf.select(pl.linear_spaces("start", "end", "num_samples")).select(
559
pl.col("start").cast(pl.List(dtype))
560
)
561
result_dt = lf_temporal.select(pl.linear_spaces("start", "end", "num_samples"))
562
563
assert_frame_equal(result_int, result_dt)
564
565
566
@pytest.mark.parametrize(
567
("dtype1", "dtype2", "str1", "str2"),
568
[
569
(pl.Date, pl.Datetime("ms"), "Date", "Datetime('ms')"),
570
(
571
pl.Datetime("ms"),
572
pl.Datetime("ns"),
573
"Datetime('ms')",
574
"Datetime('ns')",
575
),
576
(pl.Datetime("us"), pl.Time, "Datetime('μs')", "Time"),
577
(
578
pl.Duration("us"),
579
pl.Duration("ms"),
580
"Duration('μs')",
581
"Duration('ms')",
582
),
583
(pl.Int32, pl.String, "Int32", "String"),
584
],
585
)
586
def test_linear_spaces_incompatible_dtypes(
587
dtype1: PolarsDataType,
588
dtype2: PolarsDataType,
589
str1: str,
590
str2: str,
591
) -> None:
592
df = pl.LazyFrame(
593
{
594
"start": pl.Series([0]).cast(dtype1),
595
"end": pl.Series([1]).cast(dtype2),
596
"num_samples": 3,
597
}
598
)
599
with pytest.raises(
600
ComputeError,
601
match=re.escape(
602
f"'start' and 'end' have incompatible dtypes, got {str1} and {str2}"
603
),
604
):
605
df.select(pl.linear_spaces("start", "end", "num_samples")).collect()
606
607
608
def test_linear_spaces_f32() -> None:
609
df = pl.LazyFrame(
610
{
611
"start": pl.Series([0.0, 1.0], dtype=pl.Float32),
612
"end": pl.Series([10.0, 11.0], dtype=pl.Float32),
613
}
614
)
615
result = df.select(pl.linear_spaces("start", "end", 6))
616
expected = pl.LazyFrame(
617
{
618
"start": pl.Series(
619
[
620
[0.0, 2.0, 4.0, 6.0, 8.0, 10.0],
621
[1.0, 3.0, 5.0, 7.0, 9.0, 11.0],
622
],
623
dtype=pl.List(pl.Float32),
624
)
625
}
626
)
627
assert_frame_equal(result, expected)
628
629
630
def test_linear_spaces_eager() -> None:
631
start = pl.Series("s", [1, 2])
632
result = pl.linear_spaces(start, 6, 3, eager=True)
633
634
expected = pl.Series("s", [[1.0, 3.5, 6.0], [2.0, 4.0, 6.0]])
635
assert_series_equal(result, expected)
636
637