Path: blob/main/py-polars/tests/unit/operations/namespaces/string/test_pad.py
6940 views
from __future__ import annotations12import pytest34import polars as pl5from polars.exceptions import ShapeError6from polars.testing import assert_frame_equal789def test_str_pad_start() -> None:10df = pl.DataFrame({"a": ["foo", "longer_foo", "longest_fooooooo", "hi"]})1112result = df.select(13pl.col("a").str.pad_start(10).alias("padded"),14pl.col("a").str.pad_start(10).str.len_bytes().alias("padded_len"),15)1617expected = pl.DataFrame(18{19"padded": [" foo", "longer_foo", "longest_fooooooo", " hi"],20"padded_len": [10, 10, 16, 10],21},22schema_overrides={"padded_len": pl.UInt32},23)24assert_frame_equal(result, expected)252627def test_str_pad_start_expr() -> None:28df = pl.DataFrame({"a": ["a", "bbbbbb", "cc", "d", None], "b": [1, 2, None, 4, 4]})29result = df.select(30lit_expr=pl.col("a").str.pad_start(pl.lit(4)),31int_expr=pl.col("a").str.pad_start(4),32b_expr=pl.col("a").str.pad_start("b"),33)34expected = pl.DataFrame(35{36"lit_expr": [" a", "bbbbbb", " cc", " d", None],37"int_expr": [" a", "bbbbbb", " cc", " d", None],38"b_expr": ["a", "bbbbbb", None, " d", None],39}40)41assert_frame_equal(result, expected)424344def test_str_pad_end_expr() -> None:45df = pl.DataFrame({"a": ["a", "bbbbbb", "cc", "d", None], "b": [1, 2, None, 4, 4]})46result = df.select(47lit_expr=pl.col("a").str.pad_end(pl.lit(4)),48int_expr=pl.col("a").str.pad_end(4),49b_expr=pl.col("a").str.pad_end("b"),50)51expected = pl.DataFrame(52{53"lit_expr": ["a ", "bbbbbb", "cc ", "d ", None],54"int_expr": ["a ", "bbbbbb", "cc ", "d ", None],55"b_expr": ["a", "bbbbbb", None, "d ", None],56}57)58assert_frame_equal(result, expected)596061def test_str_pad_end() -> None:62df = pl.DataFrame({"a": ["foo", "longer_foo", "longest_fooooooo", "hi"]})6364result = df.select(65pl.col("a").str.pad_end(10).alias("padded"),66pl.col("a").str.pad_end(10).str.len_bytes().alias("padded_len"),67)6869expected = pl.DataFrame(70{71"padded": ["foo ", "longer_foo", "longest_fooooooo", "hi "],72"padded_len": [10, 10, 16, 10],73},74schema_overrides={"padded_len": pl.UInt32},75)76assert_frame_equal(result, expected)777879def test_str_zfill() -> None:80df = pl.DataFrame(81{82"num": [-10, -1, 0, 1, 10, 100, 1000, 10000, 100000, 1000000, None],83}84)85out = [86"-0010",87"-0001",88"00000",89"00001",90"00010",91"00100",92"01000",93"10000",94"100000",95"1000000",96None,97]98assert (99df.with_columns(pl.col("num").cast(str).str.zfill(5)).to_series().to_list()100== out101)102assert df["num"].cast(str).str.zfill(5).to_list() == out103104105def test_str_zfill_expr() -> None:106df = pl.DataFrame(107{108"num": ["-10", "-1", "0", "1", "10", None, "1", "+1"],109# u8 tests the IR length cast110"len_u8": pl.Series([3, 4, 3, 2, 5, 3, None, 3], dtype=pl.UInt8),111"len_u64": pl.Series([3, 4, 3, 2, 5, 3, None, 3], dtype=pl.UInt64),112}113)114out = df.select(115all_expr_u8=pl.col("num").str.zfill(pl.col("len_u8") + 1),116all_expr=pl.col("num").str.zfill(pl.col("len_u64") + 1),117str_lit=pl.lit("10").str.zfill(pl.col("len_u64")),118len_lit=pl.col("num").str.zfill(5),119)120expected = pl.DataFrame(121{122"all_expr_u8": [123"-010",124"-0001",125"0000",126"001",127"000010",128None,129None,130"+001",131],132"all_expr": ["-010", "-0001", "0000", "001", "000010", None, None, "+001"],133"str_lit": ["010", "0010", "010", "10", "00010", "010", None, "010"],134"len_lit": [135"-0010",136"-0001",137"00000",138"00001",139"00010",140None,141"00001",142"+0001",143],144}145)146assert_frame_equal(out, expected)147148149def test_str_zfill_wrong_length() -> None:150df = pl.DataFrame({"num": ["-10", "-1", "0"]})151with pytest.raises(ShapeError):152df.select(pl.col("num").str.zfill(pl.Series([1, 2])))153154155def test_pad_end_unicode() -> None:156lf = pl.LazyFrame({"a": ["Café", "345", "東京", None]})157158result = lf.select(pl.col("a").str.pad_end(6, "日"))159160expected = pl.LazyFrame({"a": ["Café日日", "345日日日", "東京日日日日", None]})161assert_frame_equal(result, expected)162163164def test_pad_start_unicode() -> None:165lf = pl.LazyFrame({"a": ["Café", "345", "東京", None]})166167result = lf.select(pl.col("a").str.pad_start(6, "日"))168169expected = pl.LazyFrame({"a": ["日日Café", "日日日345", "日日日日東京", None]})170assert_frame_equal(result, expected)171172173def test_str_zfill_unicode_not_respected() -> None:174lf = pl.LazyFrame({"a": ["Café", "345", "東京", None]})175176result = lf.select(pl.col("a").str.zfill(6))177178expected = pl.LazyFrame({"a": ["0Café", "000345", "東京", None]})179assert_frame_equal(result, expected)180181182