Path: blob/main/py-polars/tests/unit/functions/range/test_linear_space.py
6939 views
from __future__ import annotations12import re3from datetime import date, datetime4from typing import TYPE_CHECKING, Any56import numpy as np7import pytest89import polars as pl10from polars.exceptions import ComputeError, InvalidOperationError, ShapeError11from polars.testing import assert_frame_equal, assert_series_equal1213if TYPE_CHECKING:14from polars import Expr15from polars._typing import ClosedInterval, PolarsDataType161718@pytest.mark.parametrize(19("start", "end"),20[21(0, 0),22(0, 1),23(-1, 0),24(-2.1, 3.4),25],26)27@pytest.mark.parametrize("num_samples", [0, 1, 2, 5, 1_000])28@pytest.mark.parametrize("interval", ["both", "left", "right", "none"])29@pytest.mark.parametrize("eager", [True, False])30def test_linear_space_values(31start: int | float,32end: int | float,33num_samples: int,34interval: ClosedInterval,35eager: bool,36) -> None:37if eager:38result = pl.linear_space(39start, end, num_samples, closed=interval, eager=True40).rename("ls")41else:42result = pl.select(43ls=pl.linear_space(start, end, num_samples, closed=interval)44).to_series()4546if interval == "both":47expected = pl.Series("ls", np.linspace(start, end, num_samples))48elif interval == "left":49expected = pl.Series("ls", np.linspace(start, end, num_samples, endpoint=False))50elif interval == "right":51expected = pl.Series("ls", np.linspace(start, end, num_samples + 1)[1:])52elif interval == "none":53expected = pl.Series("ls", np.linspace(start, end, num_samples + 2)[1:-1])5455assert_series_equal(result, expected)565758def test_linear_space_expr() -> None:59lf = pl.LazyFrame({"a": [1, 2, 3, 4, 5]})6061result = lf.select(pl.linear_space(0, pl.col("a").len(), 3))62expected = lf.select(literal=pl.Series([0.0, 2.5, 5.0], dtype=pl.Float64))63assert_frame_equal(result, expected)6465result = lf.select(pl.linear_space(pl.col("a").len(), 0, 3))66expected = lf.select(a=pl.Series([5.0, 2.5, 0.0], dtype=pl.Float64))67assert_frame_equal(result, expected)686970@pytest.mark.parametrize(71("dtype_start", "dtype_end", "dtype_expected"),72[73(pl.Float32, pl.Float32, pl.Float32),74(pl.Float32, pl.Float64, pl.Float64),75(pl.Float64, pl.Float32, pl.Float64),76(pl.Float64, pl.Float64, pl.Float64),77(pl.UInt8, pl.UInt32, pl.Float64),78(pl.Int16, pl.Int128, pl.Float64),79(pl.Int8, pl.Float64, pl.Float64),80],81)82def test_linear_space_numeric_dtype(83dtype_start: PolarsDataType,84dtype_end: PolarsDataType,85dtype_expected: PolarsDataType,86) -> None:87lf = pl.LazyFrame()88result = lf.select(89ls=pl.linear_space(pl.lit(0, dtype=dtype_start), pl.lit(1, dtype=dtype_end), 6)90)91expected = lf.select(92ls=pl.Series([0.0, 0.2, 0.4, 0.6, 0.8, 1.0], dtype=dtype_expected)93)94assert_frame_equal(result, expected)959697def test_linear_space_date() -> None:98d1 = date(2025, 1, 1)99d2 = date(2025, 2, 1)100out_values = [101datetime(2025, 1, 1),102datetime(2025, 1, 11, 8),103datetime(2025, 1, 21, 16),104datetime(2025, 2, 1),105]106lf = pl.LazyFrame()107108result = lf.select(ls=pl.linear_space(d1, d2, 4, closed="both"))109expected = lf.select(ls=pl.Series(out_values, dtype=pl.Datetime("us")))110assert_frame_equal(result, expected)111112result = lf.select(ls=pl.linear_space(d1, d2, 3, closed="left"))113expected = lf.select(ls=pl.Series(out_values[:-1], dtype=pl.Datetime("us")))114assert_frame_equal(result, expected)115116result = lf.select(ls=pl.linear_space(d1, d2, 3, closed="right"))117expected = lf.select(ls=pl.Series(out_values[1:], dtype=pl.Datetime("us")))118assert_frame_equal(result, expected)119120result = lf.select(ls=pl.linear_space(d1, d2, 2, closed="none"))121expected = lf.select(ls=pl.Series(out_values[1:-1], dtype=pl.Datetime("us")))122assert_frame_equal(result, expected)123124125@pytest.mark.parametrize(126"dtype",127[128pl.Datetime("ms", None),129pl.Datetime("ms", time_zone="Asia/Tokyo"),130pl.Datetime("us", None),131pl.Datetime("us", time_zone="Asia/Tokyo"),132pl.Datetime("ns", time_zone="Asia/Tokyo"),133pl.Time,134pl.Duration("ms"),135pl.Duration("us"),136pl.Duration("ns"),137],138)139def test_linear_space_temporal(dtype: PolarsDataType) -> None:140# All temporal types except for Date, which is tested above.141start = 0142end = 1_000_000_000143144lf = pl.LazyFrame()145146result_int = lf.select(147ls=pl.linear_space(start, end, 11).cast(pl.Int64).cast(dtype)148)149result_dt = lf.select(150ls=pl.linear_space(pl.lit(start, dtype=dtype), pl.lit(end, dtype=dtype), 11)151)152153assert_frame_equal(result_int, result_dt)154155156@pytest.mark.parametrize(157("dtype1", "dtype2", "str1", "str2"),158[159(pl.Date, pl.Datetime("ms"), "Date", "Datetime('ms')"),160(161pl.Datetime("ms"),162pl.Datetime("ns"),163"Datetime('ms')",164"Datetime('ns')",165),166(pl.Datetime("us"), pl.Time, "Datetime('μs')", "Time"),167(168pl.Duration("us"),169pl.Duration("ms"),170"Duration('μs')",171"Duration('ms')",172),173(pl.Int32, pl.String, "Int32", "String"),174],175)176def test_linear_space_incompatible_dtypes(177dtype1: PolarsDataType,178dtype2: PolarsDataType,179str1: str,180str2: str,181) -> None:182value1 = pl.lit(0, dtype1)183value2 = pl.lit(1, dtype2)184with pytest.raises(185ComputeError,186match=re.escape(187f"'start' and 'end' have incompatible dtypes, got {str1} and {str2}"188),189):190pl.linear_space(value1, value2, 11, eager=True)191192193def test_linear_space_expr_wrong_length() -> None:194df = pl.DataFrame({"a": [1, 2, 3, 4, 5]})195msg = "unable to add a column of length 6 to a DataFrame of height 5"196streaming_msg = "zip node received non-equal length inputs"197with pytest.raises(ShapeError, match=rf"({msg})|({streaming_msg})"):198df.with_columns(pl.linear_space(0, 1, 6))199200201def test_linear_space_num_samples_expr() -> None:202lf = pl.LazyFrame({"a": [1, 2, 3, 4, 5]})203result = lf.with_columns(ls=pl.linear_space(0, 1, pl.len(), closed="left"))204expected = lf.with_columns(ls=pl.Series([0, 0.2, 0.4, 0.6, 0.8], dtype=pl.Float64))205assert_frame_equal(result, expected)206207208def test_linear_space_invalid_num_samples_expr() -> None:209lf = pl.LazyFrame({"x": [1, 2, 3]})210with pytest.raises(ShapeError):211lf.select(pl.linear_space(0, 1, pl.col("x"))).collect()212213214@pytest.mark.parametrize("interval", ["both", "left", "right", "none"])215def test_linear_spaces_values(interval: ClosedInterval) -> None:216starts = [217None, 0.0, 0.0, 0.0, 0.0,2180.0, None, 0.0, 0.0, 0.0,219-1.0, -1.0, None, -1.0, -1.0,220-2.1, -2.1, -2.1, None, -2.1,221] # fmt: skip222223ends = [2240.0, None, 0.0, 0.0, 0.0,2251.0, 1.0, None, 1.0, 1.0,2260.0, 0.0, 0.0, None, 0.0,2273.4, 3.4, 3.4, 3.4, None,228] # fmt: skip229230num_samples = [2310, 1, None, 5, 1_1000,2320, 1, 2, 5, None,2330, 1, 2, 5, 1_1000,2340, 1, 2, 5, 1_1000,235] # fmt: skip236237df = pl.DataFrame(238{239"start": starts,240"end": ends,241"num_samples": num_samples,242}243)244245out = df.select(pl.linear_spaces("start", "end", "num_samples", closed=interval))[246"start"247]248249# We check each element against the output from pl.linear_space(), which is250# validated above.251for row, start, end, ns in zip(out, starts, ends, num_samples):252if start is None or end is None or ns is None:253assert row is None254else:255expected = pl.linear_space(256start, end, ns, eager=True, closed=interval257).rename("")258assert_series_equal(row, expected)259260261@pytest.mark.parametrize("interval", ["both", "left", "right", "none"])262def test_linear_spaces_one_numeric(interval: ClosedInterval) -> None:263# Two expressions, one numeric input264starts = [1, 2]265ends = [5, 6]266num_samples = [3, 4]267lf = pl.LazyFrame(268{269"start": starts,270"end": ends,271"num_samples": num_samples,272}273)274result = lf.select(275pl.linear_spaces(starts[0], "end", "num_samples", closed=interval).alias(276"start"277),278pl.linear_spaces("start", ends[0], "num_samples", closed=interval).alias("end"),279pl.linear_spaces("start", "end", num_samples[0], closed=interval).alias(280"num_samples"281),282)283expected_start0 = pl.linear_space(284starts[0], ends[0], num_samples[0], closed=interval, eager=True285)286expected_start1 = pl.linear_space(287starts[0], ends[1], num_samples[1], closed=interval, eager=True288)289expected_end0 = pl.linear_space(290starts[0], ends[0], num_samples[0], closed=interval, eager=True291)292expected_end1 = pl.linear_space(293starts[1], ends[0], num_samples[1], closed=interval, eager=True294)295expected_ns0 = pl.linear_space(296starts[0], ends[0], num_samples[0], closed=interval, eager=True297)298expected_ns1 = pl.linear_space(299starts[1], ends[1], num_samples[0], closed=interval, eager=True300)301expected = pl.LazyFrame(302{303"start": [expected_start0, expected_start1],304"end": [expected_end0, expected_end1],305"num_samples": [expected_ns0, expected_ns1],306}307)308assert_frame_equal(result, expected)309310311@pytest.mark.parametrize("interval", ["both", "left", "right", "none"])312def test_linear_spaces_two_numeric(interval: ClosedInterval) -> None:313# One expression, two numeric inputs314starts = [1, 2]315ends = [5, 6]316num_samples = [3, 4]317lf = pl.LazyFrame(318{319"start": starts,320"end": ends,321"num_samples": num_samples,322}323)324result = lf.select(325pl.linear_spaces("start", ends[0], num_samples[0], closed=interval).alias(326"start"327),328pl.linear_spaces(starts[0], "end", num_samples[0], closed=interval).alias(329"end"330),331pl.linear_spaces(starts[0], ends[0], "num_samples", closed=interval).alias(332"num_samples"333),334)335expected_start0 = pl.linear_space(336starts[0], ends[0], num_samples[0], closed=interval, eager=True337)338expected_start1 = pl.linear_space(339starts[1], ends[0], num_samples[0], closed=interval, eager=True340)341expected_end0 = pl.linear_space(342starts[0], ends[0], num_samples[0], closed=interval, eager=True343)344expected_end1 = pl.linear_space(345starts[0], ends[1], num_samples[0], closed=interval, eager=True346)347expected_ns0 = pl.linear_space(348starts[0], ends[0], num_samples[0], closed=interval, eager=True349)350expected_ns1 = pl.linear_space(351starts[0], ends[0], num_samples[1], closed=interval, eager=True352)353expected = pl.LazyFrame(354{355"start": [expected_start0, expected_start1],356"end": [expected_end0, expected_end1],357"num_samples": [expected_ns0, expected_ns1],358}359)360assert_frame_equal(result, expected)361362363@pytest.mark.parametrize(364"num_samples",365[3665,367pl.lit(5),368pl.lit(5, dtype=pl.UInt8),369pl.lit(5, dtype=pl.UInt16),370pl.lit(5, dtype=pl.UInt32),371pl.lit(5, dtype=pl.UInt64),372pl.lit(5, dtype=pl.Int8),373pl.lit(5, dtype=pl.Int16),374pl.lit(5, dtype=pl.Int32),375pl.lit(5, dtype=pl.Int64),376],377)378@pytest.mark.parametrize("interval", ["both", "left", "right", "none"])379@pytest.mark.parametrize(380"dtype",381[382pl.Float32,383pl.Float64,384pl.Datetime,385],386)387def test_linear_spaces_as_array(388interval: ClosedInterval,389num_samples: int | Expr,390dtype: PolarsDataType,391) -> None:392starts = [1, 2]393ends = [5, 6]394lf = pl.LazyFrame(395{396"start": pl.Series(starts, dtype=dtype),397"end": pl.Series(ends, dtype=dtype),398}399)400result = lf.select(401a=pl.linear_spaces("start", "end", num_samples, closed=interval, as_array=True)402)403expected_0 = pl.linear_space(404pl.lit(starts[0], dtype=dtype),405pl.lit(ends[0], dtype=dtype),406num_samples,407closed=interval,408eager=True,409)410expected_1 = pl.linear_space(411pl.lit(starts[1], dtype=dtype),412pl.lit(ends[1], dtype=dtype),413num_samples,414closed=interval,415eager=True,416)417expected = pl.LazyFrame(418{"a": pl.Series([expected_0, expected_1], dtype=pl.Array(dtype, 5))}419)420assert_frame_equal(result, expected)421422423@pytest.mark.parametrize("bad_num_samples", [pl.lit("a"), 1.0, "num_samples"])424def test_linear_space_invalid_as_array(bad_num_samples: Any) -> None:425lf = pl.LazyFrame(426{427"start": [1, 2],428"end": [5, 6],429"num_samples": [2, 4],430}431)432with pytest.raises(433InvalidOperationError,434match="'as_array' is only valid when 'num_samples' is a constant integer",435):436lf.select(pl.linear_spaces("starts", "ends", bad_num_samples, as_array=True))437438439@pytest.mark.parametrize("interval", ["both", "left", "right", "none"])440def test_linear_spaces_numeric_input(interval: ClosedInterval) -> None:441starts = [1, 2]442ends = [5, 6]443num_samples = [3, 4]444lf = pl.LazyFrame(445{446"start": starts,447"end": ends,448"num_samples": num_samples,449}450)451result = lf.select(452pl.linear_spaces("start", "end", "num_samples", closed=interval).alias("all"),453pl.linear_spaces(0, "end", "num_samples", closed=interval).alias("start"),454pl.linear_spaces("start", 10, "num_samples", closed=interval).alias("end"),455pl.linear_spaces("start", "end", 8, closed=interval).alias("num_samples"),456)457expected_all0 = pl.linear_space(458starts[0], ends[0], num_samples[0], closed=interval, eager=True459)460expected_all1 = pl.linear_space(461starts[1], ends[1], num_samples[1], closed=interval, eager=True462)463expected_start0 = pl.linear_space(4640, ends[0], num_samples[0], closed=interval, eager=True465)466expected_start1 = pl.linear_space(4670, ends[1], num_samples[1], closed=interval, eager=True468)469expected_end0 = pl.linear_space(470starts[0], 10, num_samples[0], closed=interval, eager=True471)472expected_end1 = pl.linear_space(473starts[1], 10, num_samples[1], closed=interval, eager=True474)475expected_ns0 = pl.linear_space(starts[0], ends[0], 8, closed=interval, eager=True)476expected_ns1 = pl.linear_space(starts[1], ends[1], 8, closed=interval, eager=True)477expected = pl.LazyFrame(478{479"all": [expected_all0, expected_all1],480"start": [expected_start0, expected_start1],481"end": [expected_end0, expected_end1],482"num_samples": [expected_ns0, expected_ns1],483}484)485assert_frame_equal(result, expected)486487488def test_linear_spaces_date() -> None:489d1 = date(2025, 1, 1)490d2 = date(2025, 2, 1)491492lf = pl.LazyFrame(493{494"start": [None, d1, d1, d1, None, d1, d1, d1],495"end": [d2, None, d2, d2, d2, None, d2, d2],496"num_samples": [3, 3, None, 3, 4, 4, None, 4],497}498)499500result = lf.select(pl.linear_spaces("start", "end", "num_samples"))501expected = pl.LazyFrame(502{503"start": pl.Series(504[505None,506None,507None,508[509datetime(2025, 1, 1),510datetime(2025, 1, 16, 12),511datetime(2025, 2, 1),512],513None,514None,515None,516[517datetime(2025, 1, 1),518datetime(2025, 1, 11, 8),519datetime(2025, 1, 21, 16),520datetime(2025, 2, 1),521],522],523dtype=pl.List(pl.Datetime(time_unit="us")),524)525}526)527assert_frame_equal(result, expected)528529530@pytest.mark.parametrize(531"dtype",532[533pl.Datetime("ms", None),534pl.Datetime("ms", time_zone="Asia/Tokyo"),535pl.Datetime("us", None),536pl.Datetime("us", time_zone="Asia/Tokyo"),537pl.Datetime("ns", time_zone="Asia/Tokyo"),538pl.Time,539pl.Duration("ms"),540pl.Duration("us"),541pl.Duration("ns"),542],543)544def test_linear_spaces_temporal(dtype: PolarsDataType) -> None:545# All temporal types except for Date, which is tested above.546start = 0547end = 1_000_000_000548549lf = pl.LazyFrame(550{551"start": [start, start],552"end": [end, end],553"num_samples": [10, 15],554}555)556lf_temporal = lf.select(pl.col("start", "end").cast(dtype), "num_samples")557result_int = lf.select(pl.linear_spaces("start", "end", "num_samples")).select(558pl.col("start").cast(pl.List(dtype))559)560result_dt = lf_temporal.select(pl.linear_spaces("start", "end", "num_samples"))561562assert_frame_equal(result_int, result_dt)563564565@pytest.mark.parametrize(566("dtype1", "dtype2", "str1", "str2"),567[568(pl.Date, pl.Datetime("ms"), "Date", "Datetime('ms')"),569(570pl.Datetime("ms"),571pl.Datetime("ns"),572"Datetime('ms')",573"Datetime('ns')",574),575(pl.Datetime("us"), pl.Time, "Datetime('μs')", "Time"),576(577pl.Duration("us"),578pl.Duration("ms"),579"Duration('μs')",580"Duration('ms')",581),582(pl.Int32, pl.String, "Int32", "String"),583],584)585def test_linear_spaces_incompatible_dtypes(586dtype1: PolarsDataType,587dtype2: PolarsDataType,588str1: str,589str2: str,590) -> None:591df = pl.LazyFrame(592{593"start": pl.Series([0]).cast(dtype1),594"end": pl.Series([1]).cast(dtype2),595"num_samples": 3,596}597)598with pytest.raises(599ComputeError,600match=re.escape(601f"'start' and 'end' have incompatible dtypes, got {str1} and {str2}"602),603):604df.select(pl.linear_spaces("start", "end", "num_samples")).collect()605606607def test_linear_spaces_f32() -> None:608df = pl.LazyFrame(609{610"start": pl.Series([0.0, 1.0], dtype=pl.Float32),611"end": pl.Series([10.0, 11.0], dtype=pl.Float32),612}613)614result = df.select(pl.linear_spaces("start", "end", 6))615expected = pl.LazyFrame(616{617"start": pl.Series(618[619[0.0, 2.0, 4.0, 6.0, 8.0, 10.0],620[1.0, 3.0, 5.0, 7.0, 9.0, 11.0],621],622dtype=pl.List(pl.Float32),623)624}625)626assert_frame_equal(result, expected)627628629def test_linear_spaces_eager() -> None:630start = pl.Series("s", [1, 2])631result = pl.linear_spaces(start, 6, 3, eager=True)632633expected = pl.Series("s", [[1.0, 3.5, 6.0], [2.0, 4.0, 6.0]])634assert_series_equal(result, expected)635636637