Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/operations/test_display.py
8420 views
1
from __future__ import annotations
2
3
import string
4
from decimal import Decimal as D
5
from typing import TYPE_CHECKING, Any
6
7
import pytest
8
9
import polars as pl
10
from polars.exceptions import InvalidOperationError
11
from polars.testing import assert_frame_equal
12
13
if TYPE_CHECKING:
14
from collections.abc import Iterator
15
16
from polars._typing import PolarsDataType
17
18
19
@pytest.fixture(autouse=True)
20
def _environ() -> Iterator[None]:
21
"""Fixture to ensure we run with default Config settings during tests."""
22
with pl.Config(restore_defaults=True):
23
yield
24
25
26
@pytest.mark.parametrize(
27
("expected", "values"),
28
[
29
pytest.param(
30
"""shape: (1,)
31
Series: 'foo' [str]
32
[
33
"Somelongstringt…
34
]
35
""",
36
["Somelongstringto eeat wit me oundaf"],
37
id="Long string",
38
),
39
pytest.param(
40
"""shape: (1,)
41
Series: 'foo' [str]
42
[
43
"😀😁😂😃😄😅😆😇😈😉😊😋😌😎😏…
44
]
45
""",
46
["😀😁😂😃😄😅😆😇😈😉😊😋😌😎😏😐😑😒😓"],
47
id="Emojis",
48
),
49
pytest.param(
50
"""shape: (1,)
51
Series: 'foo' [str]
52
[
53
"yzäöüäöüäöüäö"
54
]
55
""",
56
["yzäöüäöüäöüäö"],
57
id="Characters with accents",
58
),
59
pytest.param(
60
"""shape: (100,)
61
Series: 'foo' [i64]
62
[
63
0
64
1
65
2
66
3
67
4
68
69
95
70
96
71
97
72
98
73
99
74
]
75
""",
76
[*range(100)],
77
id="Long series",
78
),
79
],
80
)
81
def test_fmt_series(
82
capfd: pytest.CaptureFixture[str], expected: str, values: list[Any]
83
) -> None:
84
s = pl.Series(name="foo", values=values)
85
with pl.Config(fmt_str_lengths=15):
86
print(s)
87
out, _err = capfd.readouterr()
88
assert out == expected
89
90
91
def test_fmt_series_string_truncate_default(capfd: pytest.CaptureFixture[str]) -> None:
92
values = [
93
string.ascii_lowercase + "123",
94
string.ascii_lowercase + "1234",
95
string.ascii_lowercase + "12345",
96
]
97
s = pl.Series(name="foo", values=values)
98
print(s)
99
out, _ = capfd.readouterr()
100
expected = """shape: (3,)
101
Series: 'foo' [str]
102
[
103
"abcdefghijklmnopqrstuvwxyz123"
104
"abcdefghijklmnopqrstuvwxyz1234"
105
"abcdefghijklmnopqrstuvwxyz1234…
106
]
107
"""
108
assert out == expected
109
110
111
@pytest.mark.parametrize(
112
"dtype", [pl.String, pl.Categorical, pl.Enum(["abc", "abcd", "abcde"])]
113
)
114
def test_fmt_series_string_truncate_cat(
115
dtype: PolarsDataType, capfd: pytest.CaptureFixture[str]
116
) -> None:
117
s = pl.Series(name="foo", values=["abc", "abcd", "abcde"], dtype=dtype)
118
with pl.Config(fmt_str_lengths=4):
119
print(s)
120
out, _ = capfd.readouterr()
121
result = [s.strip() for s in out.split("\n")[3:6]]
122
expected = ['"abc"', '"abcd"', '"abcd…']
123
print(result)
124
assert result == expected
125
126
127
@pytest.mark.parametrize(
128
("values", "dtype", "expected"),
129
[
130
(
131
[-127, -1, 0, 1, 127],
132
pl.Int8,
133
"""shape: (5,)
134
Series: 'foo' [i8]
135
[
136
-127
137
-1
138
0
139
1
140
127
141
]""",
142
),
143
(
144
[-32768, -1, 0, 1, 32767],
145
pl.Int16,
146
"""shape: (5,)
147
Series: 'foo' [i16]
148
[
149
-32,768
150
-1
151
0
152
1
153
32,767
154
]""",
155
),
156
(
157
[-2147483648, -1, 0, 1, 2147483647],
158
pl.Int32,
159
"""shape: (5,)
160
Series: 'foo' [i32]
161
[
162
-2,147,483,648
163
-1
164
0
165
1
166
2,147,483,647
167
]""",
168
),
169
(
170
[-9223372036854775808, -1, 0, 1, 9223372036854775807],
171
pl.Int64,
172
"""shape: (5,)
173
Series: 'foo' [i64]
174
[
175
-9,223,372,036,854,775,808
176
-1
177
0
178
1
179
9,223,372,036,854,775,807
180
]""",
181
),
182
],
183
)
184
def test_fmt_signed_int_thousands_sep(
185
values: list[int], dtype: PolarsDataType, expected: str
186
) -> None:
187
s = pl.Series(name="foo", values=values, dtype=dtype)
188
with pl.Config(thousands_separator=True):
189
assert str(s) == expected
190
191
192
@pytest.mark.parametrize(
193
("values", "dtype", "expected"),
194
[
195
(
196
[0, 1, 127],
197
pl.UInt8,
198
"""shape: (3,)
199
Series: 'foo' [u8]
200
[
201
0
202
1
203
127
204
]""",
205
),
206
(
207
[0, 1, 32767],
208
pl.UInt16,
209
"""shape: (3,)
210
Series: 'foo' [u16]
211
[
212
0
213
1
214
32,767
215
]""",
216
),
217
(
218
[0, 1, 2147483647],
219
pl.UInt32,
220
"""shape: (3,)
221
Series: 'foo' [u32]
222
[
223
0
224
1
225
2,147,483,647
226
]""",
227
),
228
(
229
[0, 1, 9223372036854775807],
230
pl.UInt64,
231
"""shape: (3,)
232
Series: 'foo' [u64]
233
[
234
0
235
1
236
9,223,372,036,854,775,807
237
]""",
238
),
239
],
240
)
241
def test_fmt_unsigned_int_thousands_sep(
242
values: list[int], dtype: PolarsDataType, expected: str
243
) -> None:
244
s = pl.Series(name="foo", values=values, dtype=dtype)
245
with pl.Config(thousands_separator=True):
246
assert str(s) == expected
247
248
249
def test_fmt_float(capfd: pytest.CaptureFixture[str]) -> None:
250
s = pl.Series(name="foo", values=[7.966e-05, 7.9e-05, 8.4666e-05, 8.00007966])
251
print(s)
252
out, _err = capfd.readouterr()
253
expected = """shape: (4,)
254
Series: 'foo' [f64]
255
[
256
0.00008
257
0.000079
258
0.000085
259
8.00008
260
]
261
"""
262
assert out == expected
263
264
265
def test_duration_smallest_units() -> None:
266
s = pl.Series(range(6), dtype=pl.Duration("us"))
267
assert (
268
str(s)
269
== "shape: (6,)\nSeries: '' [duration[μs]]\n[\n\t0µs\n\t1µs\n\t2µs\n\t3µs\n\t4µs\n\t5µs\n]"
270
)
271
s = pl.Series(range(6), dtype=pl.Duration("ms"))
272
assert (
273
str(s)
274
== "shape: (6,)\nSeries: '' [duration[ms]]\n[\n\t0ms\n\t1ms\n\t2ms\n\t3ms\n\t4ms\n\t5ms\n]"
275
)
276
s = pl.Series(range(6), dtype=pl.Duration("ns"))
277
assert (
278
str(s)
279
== "shape: (6,)\nSeries: '' [duration[ns]]\n[\n\t0ns\n\t1ns\n\t2ns\n\t3ns\n\t4ns\n\t5ns\n]"
280
)
281
282
283
def test_fmt_float_full() -> None:
284
fmt_float_full = "shape: (1,)\nSeries: '' [f64]\n[\n\t1.230498095872587\n]"
285
s = pl.Series([1.2304980958725870923])
286
287
with pl.Config() as cfg:
288
cfg.set_fmt_float("full")
289
assert str(s) == fmt_float_full
290
291
assert str(s) != fmt_float_full
292
293
294
def test_fmt_list_12188() -> None:
295
# set max_items to 1 < 4(size of failed list) to touch the testing branch.
296
with (
297
pl.Config(fmt_table_cell_list_len=1),
298
pytest.raises(InvalidOperationError, match="from `i64` to `u8` failed"),
299
):
300
pl.DataFrame(
301
{
302
"x": pl.int_range(250, 260, 1, eager=True),
303
}
304
).with_columns(u8=pl.col("x").cast(pl.UInt8))
305
306
307
def test_date_list_fmt() -> None:
308
df = pl.DataFrame(
309
{
310
"mydate": ["2020-01-01", "2020-01-02", "2020-01-05", "2020-01-05"],
311
"index": [1, 2, 5, 5],
312
}
313
)
314
315
df = df.with_columns(pl.col("mydate").str.strptime(pl.Date, "%Y-%m-%d"))
316
assert (
317
str(df.group_by("index", maintain_order=True).agg(pl.col("mydate"))["mydate"])
318
== """shape: (3,)
319
Series: 'mydate' [list[date]]
320
[
321
[2020-01-01]
322
[2020-01-02]
323
[2020-01-05, 2020-01-05]
324
]"""
325
)
326
327
328
def test_fmt_series_cat_list() -> None:
329
s = pl.Series(
330
[
331
["a", "b"],
332
["b", "a"],
333
["b"],
334
],
335
).cast(pl.List(pl.Categorical))
336
337
assert (
338
str(s)
339
== """shape: (3,)
340
Series: '' [list[cat]]
341
[
342
["a", "b"]
343
["b", "a"]
344
["b"]
345
]"""
346
)
347
348
349
def test_format_numeric_locale_options() -> None:
350
df = pl.DataFrame(
351
{
352
"a": ["xx", "yy"],
353
"b": [100000.987654321, -234567.89],
354
"c": [-11111111, 44444444444],
355
"d": [D("12345.6789"), D("-9999999.99")],
356
},
357
strict=False,
358
)
359
360
# note: numeric digit grouping looks much better
361
# when right-aligned with fixed float precision
362
with pl.Config(
363
tbl_cell_numeric_alignment="RIGHT",
364
thousands_separator=",",
365
float_precision=3,
366
):
367
assert (
368
str(df)
369
== """shape: (2, 4)
370
┌─────┬──────────────┬────────────────┬─────────────────┐
371
│ a ┆ b ┆ c ┆ d │
372
│ --- ┆ --- ┆ --- ┆ --- │
373
│ str ┆ f64 ┆ i64 ┆ decimal[38,4] │
374
╞═════╪══════════════╪════════════════╪═════════════════╡
375
│ xx ┆ 100,000.988 ┆ -11,111,111 ┆ 12,345.6789 │
376
│ yy ┆ -234,567.890 ┆ 44,444,444,444 ┆ -9,999,999.9900 │
377
└─────┴──────────────┴────────────────┴─────────────────┘"""
378
)
379
380
# switch digit/decimal separators
381
with pl.Config(
382
decimal_separator=",",
383
thousands_separator=".",
384
):
385
assert (
386
str(df)
387
== """shape: (2, 4)
388
┌─────┬────────────────┬────────────────┬─────────────────┐
389
│ a ┆ b ┆ c ┆ d │
390
│ --- ┆ --- ┆ --- ┆ --- │
391
│ str ┆ f64 ┆ i64 ┆ decimal[38,4] │
392
╞═════╪════════════════╪════════════════╪═════════════════╡
393
│ xx ┆ 100.000,987654 ┆ -11.111.111 ┆ 12.345,6789 │
394
│ yy ┆ -234.567,89 ┆ 44.444.444.444 ┆ -9.999.999,9900 │
395
└─────┴────────────────┴────────────────┴─────────────────┘"""
396
)
397
398
# default (no digit grouping, standard digit/decimal separators)
399
assert (
400
str(df)
401
== """shape: (2, 4)
402
┌─────┬───────────────┬─────────────┬───────────────┐
403
│ a ┆ b ┆ c ┆ d │
404
│ --- ┆ --- ┆ --- ┆ --- │
405
│ str ┆ f64 ┆ i64 ┆ decimal[38,4] │
406
╞═════╪═══════════════╪═════════════╪═══════════════╡
407
│ xx ┆ 100000.987654 ┆ -11111111 ┆ 12345.6789 │
408
│ yy ┆ -234567.89 ┆ 44444444444 ┆ -9999999.9900 │
409
└─────┴───────────────┴─────────────┴───────────────┘"""
410
)
411
412
413
def test_fmt_decimal_max_scale() -> None:
414
values = [D("0.14282911023321884847623576259639164703")]
415
dtype = pl.Decimal(precision=38, scale=38)
416
s = pl.Series(values, dtype=dtype)
417
result = str(s)
418
expected = """shape: (1,)
419
Series: '' [decimal[38,38]]
420
[
421
0.14282911023321884847623576259639164703
422
]"""
423
assert result == expected
424
425
426
@pytest.mark.parametrize(
427
("lf", "expected"),
428
[
429
(
430
(
431
pl.LazyFrame({"a": [1]})
432
.with_columns(b=pl.col("a"))
433
.with_columns(c=pl.col("b"), d=pl.col("a"))
434
),
435
'simple π 4/4 ["a", "b", "c", "d"]',
436
),
437
(
438
(
439
pl.LazyFrame({"a_very_very_long_string": [1], "a": [1]})
440
.with_columns(b=pl.col("a"))
441
.with_columns(c=pl.col("b"), d=pl.col("a"))
442
),
443
'simple π 5/5 ["a_very_very_long_string", "a", ... 3 other columns]',
444
),
445
(
446
(
447
pl.LazyFrame({"an_even_longer_very_very_long_string": [1], "a": [1]})
448
.with_columns(b=pl.col("a"))
449
.with_columns(c=pl.col("b"), d=pl.col("a"))
450
),
451
'simple π 5/5 ["an_even_longer_very_very_long_string", ... 4 other columns]',
452
),
453
(
454
(
455
pl.LazyFrame({"a": [1]})
456
.with_columns(b=pl.col("a"))
457
.with_columns(c=pl.col("b"), a_very_long_string_at_the_end=pl.col("a"))
458
),
459
'simple π 4/4 ["a", "b", "c", ... 1 other column]',
460
),
461
(
462
(
463
pl.LazyFrame({"a": [1]})
464
.with_columns(b=pl.col("a"))
465
.with_columns(
466
a_very_long_string_in_the_middle=pl.col("b"), d=pl.col("a")
467
)
468
),
469
'simple π 4/4 ["a", "b", ... 2 other columns]',
470
),
471
],
472
)
473
def test_simple_project_format(lf: pl.LazyFrame, expected: str) -> None:
474
result = lf.explain()
475
assert expected in result
476
477
478
@pytest.mark.parametrize(
479
("df", "expected"),
480
[
481
pytest.param(
482
pl.DataFrame({"A": range(4)}),
483
"""shape: (4, 1)
484
+-----+
485
| A |
486
+=====+
487
| 0 |
488
| 1 |
489
| ... |
490
| 3 |
491
+-----+""",
492
id="Ellipsis correctly aligned",
493
),
494
pytest.param(
495
pl.DataFrame({"A": range(2)}),
496
"""shape: (2, 1)
497
+---+
498
| A |
499
+===+
500
| 0 |
501
| 1 |
502
+---+""",
503
id="No ellipsis needed",
504
),
505
],
506
)
507
def test_format_ascii_table_truncation(df: pl.DataFrame, expected: str) -> None:
508
with pl.Config(tbl_rows=3, tbl_hide_column_data_types=True, ascii_tables=True):
509
assert str(df) == expected
510
511
512
def test_format_21393() -> None:
513
assert pl.select(pl.format("{}", pl.lit(1, pl.Int128))).item() == "1"
514
515
516
def test_format_25625() -> None:
517
repr_str = repr(pl.format(""))
518
assert repr_str.startswith("<Expr ['str.format()'] at"), repr_str
519
520
assert_frame_equal(
521
pl.DataFrame({}).select(pl.format("")), pl.DataFrame({"literal": ""})
522
)
523
assert_frame_equal(
524
pl.DataFrame({}).select(pl.format("x A + ")),
525
pl.DataFrame({"literal": "x A + "}),
526
)
527
assert_frame_equal(
528
pl.DataFrame({}).select(pl.format("x A + {{and here }}")),
529
pl.DataFrame({"literal": "x A + {{and here }}"}),
530
)
531
532