Path: blob/main/py-polars/tests/unit/functions/range/test_int_range.py
6939 views
from __future__ import annotations12from typing import Any34import pytest56import polars as pl7from polars.exceptions import (8ComputeError,9InvalidOperationError,10SchemaError,11ShapeError,12)13from polars.testing import assert_frame_equal, assert_series_equal141516def test_int_range() -> None:17result = pl.int_range(0, 3)18expected = pl.Series("int_range", [0, 1, 2])19assert_series_equal(pl.select(int_range=result).to_series(), expected)202122def test_int_range_alias() -> None:23# note: `arange` is an alias for `int_range`24ldf = pl.LazyFrame({"a": [1, 1, 1]})25result = ldf.filter(pl.col("a") >= pl.arange(0, 3)).collect()26expected = pl.DataFrame({"a": [1, 1]})27assert_frame_equal(result, expected)282930def test_int_range_decreasing() -> None:31assert pl.int_range(10, 1, -2, eager=True).to_list() == list(range(10, 1, -2))32assert pl.int_range(10, -1, -1, eager=True).to_list() == list(range(10, -1, -1))333435def test_int_range_expr() -> None:36df = pl.DataFrame({"a": ["foobar", "barfoo"]})37out = df.select(pl.int_range(0, pl.col("a").count() * 10))38assert out.shape == (20, 1)39assert out.to_series(0)[-1] == 194041# eager arange42out2 = pl.arange(0, 10, 2, eager=True)43assert out2.to_list() == [0, 2, 4, 6, 8]444546def test_int_range_short_syntax() -> None:47result = pl.int_range(3)48expected = pl.Series("int", [0, 1, 2])49assert_series_equal(pl.select(int=result).to_series(), expected)505152def test_int_ranges_short_syntax() -> None:53result = pl.int_ranges(3)54expected = pl.Series("int", [[0, 1, 2]])55assert_series_equal(pl.select(int=result).to_series(), expected)565758def test_int_range_start_default() -> None:59result = pl.int_range(end=3)60expected = pl.Series("int", [0, 1, 2])61assert_series_equal(pl.select(int=result).to_series(), expected)626364def test_int_ranges_start_default() -> None:65df = pl.DataFrame({"end": [3, 2]})66result = df.select(int_range=pl.int_ranges(end="end"))67expected = pl.DataFrame({"int_range": [[0, 1, 2], [0, 1]]})68assert_frame_equal(result, expected)697071def test_int_range_eager() -> None:72result = pl.int_range(0, 3, eager=True)73expected = pl.Series("literal", [0, 1, 2])74assert_series_equal(result, expected)757677def test_int_range_lazy() -> None:78lf = pl.select(n=pl.int_range(8, 0, -2), eager=False)79expected = pl.LazyFrame({"n": [8, 6, 4, 2]})80assert_frame_equal(lf, expected)818283def test_int_range_schema() -> None:84result = pl.LazyFrame().select(int=pl.int_range(-3, 3))8586expected_schema = {"int": pl.Int64}87assert result.collect_schema() == expected_schema88assert result.collect().schema == expected_schema899091@pytest.mark.parametrize(92("start", "end", "expected"),93[94("a", "b", pl.Series("a", [[1, 2], [2, 3]])),95(-1, "a", pl.Series("literal", [[-1, 0], [-1, 0, 1]])),96("b", 4, pl.Series("b", [[3], []])),97],98)99def test_int_ranges(start: Any, end: Any, expected: pl.Series) -> None:100df = pl.DataFrame({"a": [1, 2], "b": [3, 4]})101102result = df.select(pl.int_ranges(start, end))103assert_series_equal(result.to_series(), expected)104105106def test_int_ranges_decreasing() -> None:107expected = pl.Series("literal", [[5, 4, 3, 2, 1]], dtype=pl.List(pl.Int64))108assert_series_equal(pl.int_ranges(5, 0, -1, eager=True), expected)109assert_series_equal(pl.select(pl.int_ranges(5, 0, -1)).to_series(), expected)110111112@pytest.mark.parametrize(113("start", "end", "step"),114[115(0, -5, 1),116(5, 0, 1),117(0, 5, -1),118],119)120def test_int_ranges_empty(start: int, end: int, step: int) -> None:121assert_series_equal(122pl.int_range(start, end, step, eager=True),123pl.Series("literal", [], dtype=pl.Int64),124)125assert_series_equal(126pl.int_ranges(start, end, step, eager=True),127pl.Series("literal", [[]], dtype=pl.List(pl.Int64)),128)129assert_series_equal(130pl.Series("int", [], dtype=pl.Int64),131pl.select(int=pl.int_range(start, end, step)).to_series(),132)133assert_series_equal(134pl.Series("int_range", [[]], dtype=pl.List(pl.Int64)),135pl.select(int_range=pl.int_ranges(start, end, step)).to_series(),136)137138139def test_int_ranges_eager() -> None:140start = pl.Series("s", [1, 2])141result = pl.int_ranges(start, 4, eager=True)142143expected = pl.Series("s", [[1, 2, 3], [2, 3]])144assert_series_equal(result, expected)145146147def test_int_ranges_schema_dtype_default() -> None:148lf = pl.LazyFrame({"start": [1, 2], "end": [3, 4]})149150result = lf.select(pl.int_ranges("start", "end"))151152expected_schema = {"start": pl.List(pl.Int64)}153assert result.collect_schema() == expected_schema154assert result.collect().schema == expected_schema155156157def test_int_ranges_schema_dtype_arg() -> None:158lf = pl.LazyFrame({"start": [1, 2], "end": [3, 4]})159160result = lf.select(pl.int_ranges("start", "end", dtype=pl.UInt16))161162expected_schema = {"start": pl.List(pl.UInt16)}163assert result.collect_schema() == expected_schema164assert result.collect().schema == expected_schema165166167def test_int_range_input_shape_empty() -> None:168empty = pl.Series(dtype=pl.Time)169single = pl.Series([5])170171with pytest.raises(ShapeError):172pl.int_range(empty, single, eager=True)173with pytest.raises(ShapeError):174pl.int_range(single, empty, eager=True)175with pytest.raises(ShapeError):176pl.int_range(empty, empty, eager=True)177178179def test_int_range_input_shape_multiple_values() -> None:180single = pl.Series([5])181multiple = pl.Series([10, 15])182183with pytest.raises(ShapeError):184pl.int_range(multiple, single, eager=True)185with pytest.raises(ShapeError):186pl.int_range(single, multiple, eager=True)187with pytest.raises(ShapeError):188pl.int_range(multiple, multiple, eager=True)189190191# https://github.com/pola-rs/polars/issues/10867192def test_int_range_index_type_negative() -> None:193result = pl.select(pl.int_range(pl.lit(3).cast(pl.UInt32).alias("start"), -1, -1))194expected = pl.DataFrame({"start": [3, 2, 1, 0]})195assert_frame_equal(result, expected)196197198def test_int_range_null_input() -> None:199with pytest.raises(ComputeError, match="invalid null input for `int_range`"):200pl.select(pl.int_range(3, pl.lit(None), -1, dtype=pl.UInt32))201202203def test_int_range_invalid_conversion() -> None:204with pytest.raises(205InvalidOperationError, match="conversion from `i128` to `u32` failed"206):207pl.select(pl.int_range(3, -1, -1, dtype=pl.UInt32))208209210def test_int_range_non_integer_dtype() -> None:211with pytest.raises(212SchemaError, match="non-integer `dtype` passed to `int_range`: 'f64'"213):214pl.select(pl.int_range(3, -1, -1, dtype=pl.Float64)) # type: ignore[arg-type]215216217def test_int_ranges_broadcasting() -> None:218df = pl.DataFrame({"int": [1, 2, 3]})219result = df.select(220# result column name means these columns will be broadcast221pl.int_ranges(1, pl.Series([2, 4, 6]), "int").alias("start"),222pl.int_ranges("int", 6, "int").alias("end"),223pl.int_ranges("int", pl.col("int") + 2, 1).alias("step"),224pl.int_ranges("int", 3, 1).alias("end_step"),225pl.int_ranges(1, "int", 1).alias("start_step"),226pl.int_ranges(1, 6, "int").alias("start_end"),227pl.int_ranges("int", pl.Series([4, 5, 10]), "int").alias("no_broadcast"),228)229expected = pl.DataFrame(230{231"start": [[1], [1, 3], [1, 4]],232"end": [233[1, 2, 3, 4, 5],234[2, 4],235[3],236],237"step": [[1, 2], [2, 3], [3, 4]],238"end_step": [239[1, 2],240[2],241[],242],243"start_step": [244[],245[1],246[1, 2],247],248"start_end": [249[1, 2, 3, 4, 5],250[1, 3, 5],251[1, 4],252],253"no_broadcast": [[1, 2, 3], [2, 4], [3, 6, 9]],254}255)256assert_frame_equal(result, expected)257258259# https://github.com/pola-rs/polars/issues/15307260def test_int_range_non_int_dtype() -> None:261with pytest.raises(262SchemaError, match="non-integer `dtype` passed to `int_range`: 'str'"263):264pl.int_range(0, 3, dtype=pl.String, eager=True) # type: ignore[arg-type]265266267# https://github.com/pola-rs/polars/issues/15307268def test_int_ranges_non_int_dtype() -> None:269with pytest.raises(270SchemaError, match="non-integer `dtype` passed to `int_ranges`: 'str'"271):272pl.int_ranges(0, 3, dtype=pl.String, eager=True) # type: ignore[arg-type]273274275# https://github.com/pola-rs/polars/issues/22640276def test_int_ranges_non_numeric_input_should_error() -> None:277df = pl.DataFrame(278{279"start": ["a", "b"],280"end": ["c", "d"],281}282)283284with pytest.raises(pl.exceptions.InvalidOperationError) as excinfo:285_ = df.select(pl.int_ranges("start", "end"))286287assert "conversion from `str` to `i64` failed" in str(excinfo.value)288289290def test_int_range_len_count() -> None:291values = [1, 2, None, 4, 5, 6]292293lf = pl.Series("a", values).to_frame().lazy()294295def irange(e: pl.Expr) -> pl.LazyFrame:296return lf.select(r=pl.int_range(0, e, dtype=pl.get_index_type()))297298q = irange(pl.len())299assert_series_equal(300q.collect().to_series(),301pl.Series("r", [0, 1, 2, 3, 4, 5], pl.get_index_type()),302)303304q = irange(pl.col.a.len())305assert_series_equal(306q.collect().to_series(),307pl.Series("r", [0, 1, 2, 3, 4, 5], pl.get_index_type()),308)309310q = irange(pl.col.a.filter(pl.col.a.ne_missing(4)).len())311assert_series_equal(312q.collect().to_series(),313pl.Series("r", [0, 1, 2, 3, 4], pl.get_index_type()),314)315316q = irange(pl.col.a.count())317assert_series_equal(318q.collect().to_series(),319pl.Series("r", [0, 1, 2, 3, 4], pl.get_index_type()),320)321322q = irange(pl.col.a.filter(pl.col.a.ne_missing(4)).count())323assert_series_equal(324q.collect().to_series(),325pl.Series("r", [0, 1, 2, 3], pl.get_index_type()),326)327328329