Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/test_format.py
6939 views
1
from __future__ import annotations
2
3
import string
4
from decimal import Decimal as D
5
from typing import TYPE_CHECKING, Any
6
7
import pytest
8
9
import polars as pl
10
from polars.exceptions import InvalidOperationError
11
12
if TYPE_CHECKING:
13
from collections.abc import Iterator
14
15
from polars._typing import PolarsDataType
16
17
18
@pytest.fixture(autouse=True)
19
def _environ() -> Iterator[None]:
20
"""Fixture to ensure we run with default Config settings during tests."""
21
with pl.Config(restore_defaults=True):
22
yield
23
24
25
@pytest.mark.parametrize(
26
("expected", "values"),
27
[
28
pytest.param(
29
"""shape: (1,)
30
Series: 'foo' [str]
31
[
32
"Somelongstringt…
33
]
34
""",
35
["Somelongstringto eeat wit me oundaf"],
36
id="Long string",
37
),
38
pytest.param(
39
"""shape: (1,)
40
Series: 'foo' [str]
41
[
42
"😀😁😂😃😄😅😆😇😈😉😊😋😌😎😏…
43
]
44
""",
45
["😀😁😂😃😄😅😆😇😈😉😊😋😌😎😏😐😑😒😓"],
46
id="Emojis",
47
),
48
pytest.param(
49
"""shape: (1,)
50
Series: 'foo' [str]
51
[
52
"yzäöüäöüäöüäö"
53
]
54
""",
55
["yzäöüäöüäöüäö"],
56
id="Characters with accents",
57
),
58
pytest.param(
59
"""shape: (100,)
60
Series: 'foo' [i64]
61
[
62
0
63
1
64
2
65
3
66
4
67
68
95
69
96
70
97
71
98
72
99
73
]
74
""",
75
[*range(100)],
76
id="Long series",
77
),
78
],
79
)
80
def test_fmt_series(
81
capfd: pytest.CaptureFixture[str], expected: str, values: list[Any]
82
) -> None:
83
s = pl.Series(name="foo", values=values)
84
with pl.Config(fmt_str_lengths=15):
85
print(s)
86
out, _err = capfd.readouterr()
87
assert out == expected
88
89
90
def test_fmt_series_string_truncate_default(capfd: pytest.CaptureFixture[str]) -> None:
91
values = [
92
string.ascii_lowercase + "123",
93
string.ascii_lowercase + "1234",
94
string.ascii_lowercase + "12345",
95
]
96
s = pl.Series(name="foo", values=values)
97
print(s)
98
out, _ = capfd.readouterr()
99
expected = """shape: (3,)
100
Series: 'foo' [str]
101
[
102
"abcdefghijklmnopqrstuvwxyz123"
103
"abcdefghijklmnopqrstuvwxyz1234"
104
"abcdefghijklmnopqrstuvwxyz1234…
105
]
106
"""
107
assert out == expected
108
109
110
@pytest.mark.parametrize(
111
"dtype", [pl.String, pl.Categorical, pl.Enum(["abc", "abcd", "abcde"])]
112
)
113
def test_fmt_series_string_truncate_cat(
114
dtype: PolarsDataType, capfd: pytest.CaptureFixture[str]
115
) -> None:
116
s = pl.Series(name="foo", values=["abc", "abcd", "abcde"], dtype=dtype)
117
with pl.Config(fmt_str_lengths=4):
118
print(s)
119
out, _ = capfd.readouterr()
120
result = [s.strip() for s in out.split("\n")[3:6]]
121
expected = ['"abc"', '"abcd"', '"abcd…']
122
print(result)
123
assert result == expected
124
125
126
@pytest.mark.parametrize(
127
("values", "dtype", "expected"),
128
[
129
(
130
[-127, -1, 0, 1, 127],
131
pl.Int8,
132
"""shape: (5,)
133
Series: 'foo' [i8]
134
[
135
-127
136
-1
137
0
138
1
139
127
140
]""",
141
),
142
(
143
[-32768, -1, 0, 1, 32767],
144
pl.Int16,
145
"""shape: (5,)
146
Series: 'foo' [i16]
147
[
148
-32,768
149
-1
150
0
151
1
152
32,767
153
]""",
154
),
155
(
156
[-2147483648, -1, 0, 1, 2147483647],
157
pl.Int32,
158
"""shape: (5,)
159
Series: 'foo' [i32]
160
[
161
-2,147,483,648
162
-1
163
0
164
1
165
2,147,483,647
166
]""",
167
),
168
(
169
[-9223372036854775808, -1, 0, 1, 9223372036854775807],
170
pl.Int64,
171
"""shape: (5,)
172
Series: 'foo' [i64]
173
[
174
-9,223,372,036,854,775,808
175
-1
176
0
177
1
178
9,223,372,036,854,775,807
179
]""",
180
),
181
],
182
)
183
def test_fmt_signed_int_thousands_sep(
184
values: list[int], dtype: PolarsDataType, expected: str
185
) -> None:
186
s = pl.Series(name="foo", values=values, dtype=dtype)
187
with pl.Config(thousands_separator=True):
188
assert str(s) == expected
189
190
191
@pytest.mark.parametrize(
192
("values", "dtype", "expected"),
193
[
194
(
195
[0, 1, 127],
196
pl.UInt8,
197
"""shape: (3,)
198
Series: 'foo' [u8]
199
[
200
0
201
1
202
127
203
]""",
204
),
205
(
206
[0, 1, 32767],
207
pl.UInt16,
208
"""shape: (3,)
209
Series: 'foo' [u16]
210
[
211
0
212
1
213
32,767
214
]""",
215
),
216
(
217
[0, 1, 2147483647],
218
pl.UInt32,
219
"""shape: (3,)
220
Series: 'foo' [u32]
221
[
222
0
223
1
224
2,147,483,647
225
]""",
226
),
227
(
228
[0, 1, 9223372036854775807],
229
pl.UInt64,
230
"""shape: (3,)
231
Series: 'foo' [u64]
232
[
233
0
234
1
235
9,223,372,036,854,775,807
236
]""",
237
),
238
],
239
)
240
def test_fmt_unsigned_int_thousands_sep(
241
values: list[int], dtype: PolarsDataType, expected: str
242
) -> None:
243
s = pl.Series(name="foo", values=values, dtype=dtype)
244
with pl.Config(thousands_separator=True):
245
assert str(s) == expected
246
247
248
def test_fmt_float(capfd: pytest.CaptureFixture[str]) -> None:
249
s = pl.Series(name="foo", values=[7.966e-05, 7.9e-05, 8.4666e-05, 8.00007966])
250
print(s)
251
out, _err = capfd.readouterr()
252
expected = """shape: (4,)
253
Series: 'foo' [f64]
254
[
255
0.00008
256
0.000079
257
0.000085
258
8.00008
259
]
260
"""
261
assert out == expected
262
263
264
def test_duration_smallest_units() -> None:
265
s = pl.Series(range(6), dtype=pl.Duration("us"))
266
assert (
267
str(s)
268
== "shape: (6,)\nSeries: '' [duration[μs]]\n[\n\t0µs\n\t1µs\n\t2µs\n\t3µs\n\t4µs\n\t5µs\n]"
269
)
270
s = pl.Series(range(6), dtype=pl.Duration("ms"))
271
assert (
272
str(s)
273
== "shape: (6,)\nSeries: '' [duration[ms]]\n[\n\t0ms\n\t1ms\n\t2ms\n\t3ms\n\t4ms\n\t5ms\n]"
274
)
275
s = pl.Series(range(6), dtype=pl.Duration("ns"))
276
assert (
277
str(s)
278
== "shape: (6,)\nSeries: '' [duration[ns]]\n[\n\t0ns\n\t1ns\n\t2ns\n\t3ns\n\t4ns\n\t5ns\n]"
279
)
280
281
282
def test_fmt_float_full() -> None:
283
fmt_float_full = "shape: (1,)\nSeries: '' [f64]\n[\n\t1.230498095872587\n]"
284
s = pl.Series([1.2304980958725870923])
285
286
with pl.Config() as cfg:
287
cfg.set_fmt_float("full")
288
assert str(s) == fmt_float_full
289
290
assert str(s) != fmt_float_full
291
292
293
def test_fmt_list_12188() -> None:
294
# set max_items to 1 < 4(size of failed list) to touch the testing branch.
295
with (
296
pl.Config(fmt_table_cell_list_len=1),
297
pytest.raises(InvalidOperationError, match="from `i64` to `u8` failed"),
298
):
299
pl.DataFrame(
300
{
301
"x": pl.int_range(250, 260, 1, eager=True),
302
}
303
).with_columns(u8=pl.col("x").cast(pl.UInt8))
304
305
306
def test_date_list_fmt() -> None:
307
df = pl.DataFrame(
308
{
309
"mydate": ["2020-01-01", "2020-01-02", "2020-01-05", "2020-01-05"],
310
"index": [1, 2, 5, 5],
311
}
312
)
313
314
df = df.with_columns(pl.col("mydate").str.strptime(pl.Date, "%Y-%m-%d"))
315
assert (
316
str(df.group_by("index", maintain_order=True).agg(pl.col("mydate"))["mydate"])
317
== """shape: (3,)
318
Series: 'mydate' [list[date]]
319
[
320
[2020-01-01]
321
[2020-01-02]
322
[2020-01-05, 2020-01-05]
323
]"""
324
)
325
326
327
def test_fmt_series_cat_list() -> None:
328
s = pl.Series(
329
[
330
["a", "b"],
331
["b", "a"],
332
["b"],
333
],
334
).cast(pl.List(pl.Categorical))
335
336
assert (
337
str(s)
338
== """shape: (3,)
339
Series: '' [list[cat]]
340
[
341
["a", "b"]
342
["b", "a"]
343
["b"]
344
]"""
345
)
346
347
348
def test_format_numeric_locale_options() -> None:
349
df = pl.DataFrame(
350
{
351
"a": ["xx", "yy"],
352
"b": [100000.987654321, -234567.89],
353
"c": [-11111111, 44444444444],
354
"d": [D("12345.6789"), D("-9999999.99")],
355
},
356
strict=False,
357
)
358
359
# note: numeric digit grouping looks much better
360
# when right-aligned with fixed float precision
361
with pl.Config(
362
tbl_cell_numeric_alignment="RIGHT",
363
thousands_separator=",",
364
float_precision=3,
365
):
366
print(df)
367
assert (
368
str(df)
369
== """shape: (2, 4)
370
┌─────┬──────────────┬────────────────┬─────────────────┐
371
│ a ┆ b ┆ c ┆ d │
372
│ --- ┆ --- ┆ --- ┆ --- │
373
│ str ┆ f64 ┆ i64 ┆ decimal[*,4] │
374
╞═════╪══════════════╪════════════════╪═════════════════╡
375
│ xx ┆ 100,000.988 ┆ -11,111,111 ┆ 12,345.6789 │
376
│ yy ┆ -234,567.890 ┆ 44,444,444,444 ┆ -9,999,999.9900 │
377
└─────┴──────────────┴────────────────┴─────────────────┘"""
378
)
379
380
# switch digit/decimal separators
381
with pl.Config(
382
decimal_separator=",",
383
thousands_separator=".",
384
):
385
assert (
386
str(df)
387
== """shape: (2, 4)
388
┌─────┬────────────────┬────────────────┬─────────────────┐
389
│ a ┆ b ┆ c ┆ d │
390
│ --- ┆ --- ┆ --- ┆ --- │
391
│ str ┆ f64 ┆ i64 ┆ decimal[*,4] │
392
╞═════╪════════════════╪════════════════╪═════════════════╡
393
│ xx ┆ 100.000,987654 ┆ -11.111.111 ┆ 12.345,6789 │
394
│ yy ┆ -234.567,89 ┆ 44.444.444.444 ┆ -9.999.999,9900 │
395
└─────┴────────────────┴────────────────┴─────────────────┘"""
396
)
397
398
# default (no digit grouping, standard digit/decimal separators)
399
assert (
400
str(df)
401
== """shape: (2, 4)
402
┌─────┬───────────────┬─────────────┬───────────────┐
403
│ a ┆ b ┆ c ┆ d │
404
│ --- ┆ --- ┆ --- ┆ --- │
405
│ str ┆ f64 ┆ i64 ┆ decimal[*,4] │
406
╞═════╪═══════════════╪═════════════╪═══════════════╡
407
│ xx ┆ 100000.987654 ┆ -11111111 ┆ 12345.6789 │
408
│ yy ┆ -234567.89 ┆ 44444444444 ┆ -9999999.9900 │
409
└─────┴───────────────┴─────────────┴───────────────┘"""
410
)
411
412
413
def test_fmt_decimal_max_scale() -> None:
414
values = [D("0.14282911023321884847623576259639164703")]
415
dtype = pl.Decimal(precision=38, scale=38)
416
s = pl.Series(values, dtype=dtype)
417
result = str(s)
418
expected = """shape: (1,)
419
Series: '' [decimal[38,38]]
420
[
421
0.14282911023321884847623576259639164703
422
]"""
423
assert result == expected
424
425
426
@pytest.mark.parametrize(
427
("lf", "expected"),
428
[
429
(
430
(
431
pl.LazyFrame({"a": [1]})
432
.with_columns(b=pl.col("a"))
433
.with_columns(c=pl.col("b"), d=pl.col("a"))
434
),
435
'simple π 4/4 ["a", "b", "c", "d"]',
436
),
437
(
438
(
439
pl.LazyFrame({"a_very_very_long_string": [1], "a": [1]})
440
.with_columns(b=pl.col("a"))
441
.with_columns(c=pl.col("b"), d=pl.col("a"))
442
),
443
'simple π 5/5 ["a_very_very_long_string", "a", ... 3 other columns]',
444
),
445
(
446
(
447
pl.LazyFrame({"an_even_longer_very_very_long_string": [1], "a": [1]})
448
.with_columns(b=pl.col("a"))
449
.with_columns(c=pl.col("b"), d=pl.col("a"))
450
),
451
'simple π 5/5 ["an_even_longer_very_very_long_string", ... 4 other columns]',
452
),
453
(
454
(
455
pl.LazyFrame({"a": [1]})
456
.with_columns(b=pl.col("a"))
457
.with_columns(c=pl.col("b"), a_very_long_string_at_the_end=pl.col("a"))
458
),
459
'simple π 4/4 ["a", "b", "c", ... 1 other column]',
460
),
461
(
462
(
463
pl.LazyFrame({"a": [1]})
464
.with_columns(b=pl.col("a"))
465
.with_columns(
466
a_very_long_string_in_the_middle=pl.col("b"), d=pl.col("a")
467
)
468
),
469
'simple π 4/4 ["a", "b", ... 2 other columns]',
470
),
471
],
472
)
473
def test_simple_project_format(lf: pl.LazyFrame, expected: str) -> None:
474
result = lf.explain()
475
assert expected in result
476
477
478
@pytest.mark.parametrize(
479
("df", "expected"),
480
[
481
pytest.param(
482
pl.DataFrame({"A": range(4)}),
483
"""shape: (4, 1)
484
+-----+
485
| A |
486
+=====+
487
| 0 |
488
| 1 |
489
| ... |
490
| 3 |
491
+-----+""",
492
id="Ellipsis correctly aligned",
493
),
494
pytest.param(
495
pl.DataFrame({"A": range(2)}),
496
"""shape: (2, 1)
497
+---+
498
| A |
499
+===+
500
| 0 |
501
| 1 |
502
+---+""",
503
id="No ellipsis needed",
504
),
505
],
506
)
507
def test_format_ascii_table_truncation(df: pl.DataFrame, expected: str) -> None:
508
with pl.Config(tbl_rows=3, tbl_hide_column_data_types=True, ascii_tables=True):
509
assert str(df) == expected
510
511
512
def test_format_21393() -> None:
513
assert pl.select(pl.format("{}", pl.lit(1, pl.Int128))).item() == "1"
514
515