Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/operations/namespaces/string/test_pad.py
6940 views
1
from __future__ import annotations
2
3
import pytest
4
5
import polars as pl
6
from polars.exceptions import ShapeError
7
from polars.testing import assert_frame_equal
8
9
10
def test_str_pad_start() -> None:
11
df = pl.DataFrame({"a": ["foo", "longer_foo", "longest_fooooooo", "hi"]})
12
13
result = df.select(
14
pl.col("a").str.pad_start(10).alias("padded"),
15
pl.col("a").str.pad_start(10).str.len_bytes().alias("padded_len"),
16
)
17
18
expected = pl.DataFrame(
19
{
20
"padded": [" foo", "longer_foo", "longest_fooooooo", " hi"],
21
"padded_len": [10, 10, 16, 10],
22
},
23
schema_overrides={"padded_len": pl.UInt32},
24
)
25
assert_frame_equal(result, expected)
26
27
28
def test_str_pad_start_expr() -> None:
29
df = pl.DataFrame({"a": ["a", "bbbbbb", "cc", "d", None], "b": [1, 2, None, 4, 4]})
30
result = df.select(
31
lit_expr=pl.col("a").str.pad_start(pl.lit(4)),
32
int_expr=pl.col("a").str.pad_start(4),
33
b_expr=pl.col("a").str.pad_start("b"),
34
)
35
expected = pl.DataFrame(
36
{
37
"lit_expr": [" a", "bbbbbb", " cc", " d", None],
38
"int_expr": [" a", "bbbbbb", " cc", " d", None],
39
"b_expr": ["a", "bbbbbb", None, " d", None],
40
}
41
)
42
assert_frame_equal(result, expected)
43
44
45
def test_str_pad_end_expr() -> None:
46
df = pl.DataFrame({"a": ["a", "bbbbbb", "cc", "d", None], "b": [1, 2, None, 4, 4]})
47
result = df.select(
48
lit_expr=pl.col("a").str.pad_end(pl.lit(4)),
49
int_expr=pl.col("a").str.pad_end(4),
50
b_expr=pl.col("a").str.pad_end("b"),
51
)
52
expected = pl.DataFrame(
53
{
54
"lit_expr": ["a ", "bbbbbb", "cc ", "d ", None],
55
"int_expr": ["a ", "bbbbbb", "cc ", "d ", None],
56
"b_expr": ["a", "bbbbbb", None, "d ", None],
57
}
58
)
59
assert_frame_equal(result, expected)
60
61
62
def test_str_pad_end() -> None:
63
df = pl.DataFrame({"a": ["foo", "longer_foo", "longest_fooooooo", "hi"]})
64
65
result = df.select(
66
pl.col("a").str.pad_end(10).alias("padded"),
67
pl.col("a").str.pad_end(10).str.len_bytes().alias("padded_len"),
68
)
69
70
expected = pl.DataFrame(
71
{
72
"padded": ["foo ", "longer_foo", "longest_fooooooo", "hi "],
73
"padded_len": [10, 10, 16, 10],
74
},
75
schema_overrides={"padded_len": pl.UInt32},
76
)
77
assert_frame_equal(result, expected)
78
79
80
def test_str_zfill() -> None:
81
df = pl.DataFrame(
82
{
83
"num": [-10, -1, 0, 1, 10, 100, 1000, 10000, 100000, 1000000, None],
84
}
85
)
86
out = [
87
"-0010",
88
"-0001",
89
"00000",
90
"00001",
91
"00010",
92
"00100",
93
"01000",
94
"10000",
95
"100000",
96
"1000000",
97
None,
98
]
99
assert (
100
df.with_columns(pl.col("num").cast(str).str.zfill(5)).to_series().to_list()
101
== out
102
)
103
assert df["num"].cast(str).str.zfill(5).to_list() == out
104
105
106
def test_str_zfill_expr() -> None:
107
df = pl.DataFrame(
108
{
109
"num": ["-10", "-1", "0", "1", "10", None, "1", "+1"],
110
# u8 tests the IR length cast
111
"len_u8": pl.Series([3, 4, 3, 2, 5, 3, None, 3], dtype=pl.UInt8),
112
"len_u64": pl.Series([3, 4, 3, 2, 5, 3, None, 3], dtype=pl.UInt64),
113
}
114
)
115
out = df.select(
116
all_expr_u8=pl.col("num").str.zfill(pl.col("len_u8") + 1),
117
all_expr=pl.col("num").str.zfill(pl.col("len_u64") + 1),
118
str_lit=pl.lit("10").str.zfill(pl.col("len_u64")),
119
len_lit=pl.col("num").str.zfill(5),
120
)
121
expected = pl.DataFrame(
122
{
123
"all_expr_u8": [
124
"-010",
125
"-0001",
126
"0000",
127
"001",
128
"000010",
129
None,
130
None,
131
"+001",
132
],
133
"all_expr": ["-010", "-0001", "0000", "001", "000010", None, None, "+001"],
134
"str_lit": ["010", "0010", "010", "10", "00010", "010", None, "010"],
135
"len_lit": [
136
"-0010",
137
"-0001",
138
"00000",
139
"00001",
140
"00010",
141
None,
142
"00001",
143
"+0001",
144
],
145
}
146
)
147
assert_frame_equal(out, expected)
148
149
150
def test_str_zfill_wrong_length() -> None:
151
df = pl.DataFrame({"num": ["-10", "-1", "0"]})
152
with pytest.raises(ShapeError):
153
df.select(pl.col("num").str.zfill(pl.Series([1, 2])))
154
155
156
def test_pad_end_unicode() -> None:
157
lf = pl.LazyFrame({"a": ["Café", "345", "東京", None]})
158
159
result = lf.select(pl.col("a").str.pad_end(6, "日"))
160
161
expected = pl.LazyFrame({"a": ["Café日日", "345日日日", "東京日日日日", None]})
162
assert_frame_equal(result, expected)
163
164
165
def test_pad_start_unicode() -> None:
166
lf = pl.LazyFrame({"a": ["Café", "345", "東京", None]})
167
168
result = lf.select(pl.col("a").str.pad_start(6, "日"))
169
170
expected = pl.LazyFrame({"a": ["日日Café", "日日日345", "日日日日東京", None]})
171
assert_frame_equal(result, expected)
172
173
174
def test_str_zfill_unicode_not_respected() -> None:
175
lf = pl.LazyFrame({"a": ["Café", "345", "東京", None]})
176
177
result = lf.select(pl.col("a").str.zfill(6))
178
179
expected = pl.LazyFrame({"a": ["0Café", "000345", "東京", None]})
180
assert_frame_equal(result, expected)
181
182