CoCalc -- test_format.py

GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/test_format.py
⁶⁹³⁹ views
1
from __future__ import annotations
2

3
import string
4
from decimal import Decimal as D
5
from typing import TYPE_CHECKING, Any
6

7
import pytest
8

9
import polars as pl
10
from polars.exceptions import InvalidOperationError
11

12
if TYPE_CHECKING:
13
    from collections.abc import Iterator
14

15
    from polars._typing import PolarsDataType
16

17

18
@pytest.fixture(autouse=True)
19
def _environ() -> Iterator[None]:
20
    """Fixture to ensure we run with default Config settings during tests."""
21
    with pl.Config(restore_defaults=True):
22
        yield
23

24

25
@pytest.mark.parametrize(
26
    ("expected", "values"),
27
    [
28
        pytest.param(
29
            """shape: (1,)
30
Series: 'foo' [str]
31
[
32
	"Somelongstringt…
33
]
34
""",
35
            ["Somelongstringto eeat wit me oundaf"],
36
            id="Long string",
37
        ),
38
        pytest.param(
39
            """shape: (1,)
40
Series: 'foo' [str]
41
[
42
	"😀😁😂😃😄😅😆😇😈😉😊😋😌😎😏…
43
]
44
""",
45
            ["😀😁😂😃😄😅😆😇😈😉😊😋😌😎😏😐😑😒😓"],
46
            id="Emojis",
47
        ),
48
        pytest.param(
49
            """shape: (1,)
50
Series: 'foo' [str]
51
[
52
	"yzäöüäöüäöüäö"
53
]
54
""",
55
            ["yzäöüäöüäöüäö"],
56
            id="Characters with accents",
57
        ),
58
        pytest.param(
59
            """shape: (100,)
60
Series: 'foo' [i64]
61
[
62
	0
63
	1
64
	2
65
	3
66
	4
67
	…
68
	95
69
	96
70
	97
71
	98
72
	99
73
]
74
""",
75
            [*range(100)],
76
            id="Long series",
77
        ),
78
    ],
79
)
80
def test_fmt_series(
81
    capfd: pytest.CaptureFixture[str], expected: str, values: list[Any]
82
) -> None:
83
    s = pl.Series(name="foo", values=values)
84
    with pl.Config(fmt_str_lengths=15):
85
        print(s)
86
    out, _err = capfd.readouterr()
87
    assert out == expected
88

89

90
def test_fmt_series_string_truncate_default(capfd: pytest.CaptureFixture[str]) -> None:
91
    values = [
92
        string.ascii_lowercase + "123",
93
        string.ascii_lowercase + "1234",
94
        string.ascii_lowercase + "12345",
95
    ]
96
    s = pl.Series(name="foo", values=values)
97
    print(s)
98
    out, _ = capfd.readouterr()
99
    expected = """shape: (3,)
100
Series: 'foo' [str]
101
[
102
	"abcdefghijklmnopqrstuvwxyz123"
103
	"abcdefghijklmnopqrstuvwxyz1234"
104
	"abcdefghijklmnopqrstuvwxyz1234…
105
]
106
"""
107
    assert out == expected
108

109

110
@pytest.mark.parametrize(
111
    "dtype", [pl.String, pl.Categorical, pl.Enum(["abc", "abcd", "abcde"])]
112
)
113
def test_fmt_series_string_truncate_cat(
114
    dtype: PolarsDataType, capfd: pytest.CaptureFixture[str]
115
) -> None:
116
    s = pl.Series(name="foo", values=["abc", "abcd", "abcde"], dtype=dtype)
117
    with pl.Config(fmt_str_lengths=4):
118
        print(s)
119
    out, _ = capfd.readouterr()
120
    result = [s.strip() for s in out.split("\n")[3:6]]
121
    expected = ['"abc"', '"abcd"', '"abcd…']
122
    print(result)
123
    assert result == expected
124

125

126
@pytest.mark.parametrize(
127
    ("values", "dtype", "expected"),
128
    [
129
        (
130
            [-127, -1, 0, 1, 127],
131
            pl.Int8,
132
            """shape: (5,)
133
Series: 'foo' [i8]
134
[
135
	-127
136
	-1
137
	0
138
	1
139
	127
140
]""",
141
        ),
142
        (
143
            [-32768, -1, 0, 1, 32767],
144
            pl.Int16,
145
            """shape: (5,)
146
Series: 'foo' [i16]
147
[
148
	-32,768
149
	-1
150
	0
151
	1
152
	32,767
153
]""",
154
        ),
155
        (
156
            [-2147483648, -1, 0, 1, 2147483647],
157
            pl.Int32,
158
            """shape: (5,)
159
Series: 'foo' [i32]
160
[
161
	-2,147,483,648
162
	-1
163
	0
164
	1
165
	2,147,483,647
166
]""",
167
        ),
168
        (
169
            [-9223372036854775808, -1, 0, 1, 9223372036854775807],
170
            pl.Int64,
171
            """shape: (5,)
172
Series: 'foo' [i64]
173
[
174
	-9,223,372,036,854,775,808
175
	-1
176
	0
177
	1
178
	9,223,372,036,854,775,807
179
]""",
180
        ),
181
    ],
182
)
183
def test_fmt_signed_int_thousands_sep(
184
    values: list[int], dtype: PolarsDataType, expected: str
185
) -> None:
186
    s = pl.Series(name="foo", values=values, dtype=dtype)
187
    with pl.Config(thousands_separator=True):
188
        assert str(s) == expected
189

190

191
@pytest.mark.parametrize(
192
    ("values", "dtype", "expected"),
193
    [
194
        (
195
            [0, 1, 127],
196
            pl.UInt8,
197
            """shape: (3,)
198
Series: 'foo' [u8]
199
[
200
	0
201
	1
202
	127
203
]""",
204
        ),
205
        (
206
            [0, 1, 32767],
207
            pl.UInt16,
208
            """shape: (3,)
209
Series: 'foo' [u16]
210
[
211
	0
212
	1
213
	32,767
214
]""",
215
        ),
216
        (
217
            [0, 1, 2147483647],
218
            pl.UInt32,
219
            """shape: (3,)
220
Series: 'foo' [u32]
221
[
222
	0
223
	1
224
	2,147,483,647
225
]""",
226
        ),
227
        (
228
            [0, 1, 9223372036854775807],
229
            pl.UInt64,
230
            """shape: (3,)
231
Series: 'foo' [u64]
232
[
233
	0
234
	1
235
	9,223,372,036,854,775,807
236
]""",
237
        ),
238
    ],
239
)
240
def test_fmt_unsigned_int_thousands_sep(
241
    values: list[int], dtype: PolarsDataType, expected: str
242
) -> None:
243
    s = pl.Series(name="foo", values=values, dtype=dtype)
244
    with pl.Config(thousands_separator=True):
245
        assert str(s) == expected
246

247

248
def test_fmt_float(capfd: pytest.CaptureFixture[str]) -> None:
249
    s = pl.Series(name="foo", values=[7.966e-05, 7.9e-05, 8.4666e-05, 8.00007966])
250
    print(s)
251
    out, _err = capfd.readouterr()
252
    expected = """shape: (4,)
253
Series: 'foo' [f64]
254
[
255
	0.00008
256
	0.000079
257
	0.000085
258
	8.00008
259
]
260
"""
261
    assert out == expected
262

263

264
def test_duration_smallest_units() -> None:
265
    s = pl.Series(range(6), dtype=pl.Duration("us"))
266
    assert (
267
        str(s)
268
        == "shape: (6,)\nSeries: '' [duration[μs]]\n[\n\t0µs\n\t1µs\n\t2µs\n\t3µs\n\t4µs\n\t5µs\n]"
269
    )
270
    s = pl.Series(range(6), dtype=pl.Duration("ms"))
271
    assert (
272
        str(s)
273
        == "shape: (6,)\nSeries: '' [duration[ms]]\n[\n\t0ms\n\t1ms\n\t2ms\n\t3ms\n\t4ms\n\t5ms\n]"
274
    )
275
    s = pl.Series(range(6), dtype=pl.Duration("ns"))
276
    assert (
277
        str(s)
278
        == "shape: (6,)\nSeries: '' [duration[ns]]\n[\n\t0ns\n\t1ns\n\t2ns\n\t3ns\n\t4ns\n\t5ns\n]"
279
    )
280

281

282
def test_fmt_float_full() -> None:
283
    fmt_float_full = "shape: (1,)\nSeries: '' [f64]\n[\n\t1.230498095872587\n]"
284
    s = pl.Series([1.2304980958725870923])
285

286
    with pl.Config() as cfg:
287
        cfg.set_fmt_float("full")
288
        assert str(s) == fmt_float_full
289

290
    assert str(s) != fmt_float_full
291

292

293
def test_fmt_list_12188() -> None:
294
    # set max_items to 1 < 4(size of failed list) to touch the testing branch.
295
    with (
296
        pl.Config(fmt_table_cell_list_len=1),
297
        pytest.raises(InvalidOperationError, match="from `i64` to `u8` failed"),
298
    ):
299
        pl.DataFrame(
300
            {
301
                "x": pl.int_range(250, 260, 1, eager=True),
302
            }
303
        ).with_columns(u8=pl.col("x").cast(pl.UInt8))
304

305

306
def test_date_list_fmt() -> None:
307
    df = pl.DataFrame(
308
        {
309
            "mydate": ["2020-01-01", "2020-01-02", "2020-01-05", "2020-01-05"],
310
            "index": [1, 2, 5, 5],
311
        }
312
    )
313

314
    df = df.with_columns(pl.col("mydate").str.strptime(pl.Date, "%Y-%m-%d"))
315
    assert (
316
        str(df.group_by("index", maintain_order=True).agg(pl.col("mydate"))["mydate"])
317
        == """shape: (3,)
318
Series: 'mydate' [list[date]]
319
[
320
	[2020-01-01]
321
	[2020-01-02]
322
	[2020-01-05, 2020-01-05]
323
]"""
324
    )
325

326

327
def test_fmt_series_cat_list() -> None:
328
    s = pl.Series(
329
        [
330
            ["a", "b"],
331
            ["b", "a"],
332
            ["b"],
333
        ],
334
    ).cast(pl.List(pl.Categorical))
335

336
    assert (
337
        str(s)
338
        == """shape: (3,)
339
Series: '' [list[cat]]
340
[
341
	["a", "b"]
342
	["b", "a"]
343
	["b"]
344
]"""
345
    )
346

347

348
def test_format_numeric_locale_options() -> None:
349
    df = pl.DataFrame(
350
        {
351
            "a": ["xx", "yy"],
352
            "b": [100000.987654321, -234567.89],
353
            "c": [-11111111, 44444444444],
354
            "d": [D("12345.6789"), D("-9999999.99")],
355
        },
356
        strict=False,
357
    )
358

359
    # note: numeric digit grouping looks much better
360
    # when right-aligned with fixed float precision
361
    with pl.Config(
362
        tbl_cell_numeric_alignment="RIGHT",
363
        thousands_separator=",",
364
        float_precision=3,
365
    ):
366
        print(df)
367
        assert (
368
            str(df)
369
            == """shape: (2, 4)
370
┌─────┬──────────────┬────────────────┬─────────────────┐
371
│ a   ┆            b ┆              c ┆               d │
372
│ --- ┆          --- ┆            --- ┆             --- │
373
│ str ┆          f64 ┆            i64 ┆    decimal[*,4] │
374
╞═════╪══════════════╪════════════════╪═════════════════╡
375
│ xx  ┆  100,000.988 ┆    -11,111,111 ┆     12,345.6789 │
376
│ yy  ┆ -234,567.890 ┆ 44,444,444,444 ┆ -9,999,999.9900 │
377
└─────┴──────────────┴────────────────┴─────────────────┘"""
378
        )
379

380
    # switch digit/decimal separators
381
    with pl.Config(
382
        decimal_separator=",",
383
        thousands_separator=".",
384
    ):
385
        assert (
386
            str(df)
387
            == """shape: (2, 4)
388
┌─────┬────────────────┬────────────────┬─────────────────┐
389
│ a   ┆ b              ┆ c              ┆ d               │
390
│ --- ┆ ---            ┆ ---            ┆ ---             │
391
│ str ┆ f64            ┆ i64            ┆ decimal[*,4]    │
392
╞═════╪════════════════╪════════════════╪═════════════════╡
393
│ xx  ┆ 100.000,987654 ┆ -11.111.111    ┆ 12.345,6789     │
394
│ yy  ┆ -234.567,89    ┆ 44.444.444.444 ┆ -9.999.999,9900 │
395
└─────┴────────────────┴────────────────┴─────────────────┘"""
396
        )
397

398
    # default (no digit grouping, standard digit/decimal separators)
399
    assert (
400
        str(df)
401
        == """shape: (2, 4)
402
┌─────┬───────────────┬─────────────┬───────────────┐
403
│ a   ┆ b             ┆ c           ┆ d             │
404
│ --- ┆ ---           ┆ ---         ┆ ---           │
405
│ str ┆ f64           ┆ i64         ┆ decimal[*,4]  │
406
╞═════╪═══════════════╪═════════════╪═══════════════╡
407
│ xx  ┆ 100000.987654 ┆ -11111111   ┆ 12345.6789    │
408
│ yy  ┆ -234567.89    ┆ 44444444444 ┆ -9999999.9900 │
409
└─────┴───────────────┴─────────────┴───────────────┘"""
410
    )
411

412

413
def test_fmt_decimal_max_scale() -> None:
414
    values = [D("0.14282911023321884847623576259639164703")]
415
    dtype = pl.Decimal(precision=38, scale=38)
416
    s = pl.Series(values, dtype=dtype)
417
    result = str(s)
418
    expected = """shape: (1,)
419
Series: '' [decimal[38,38]]
420
[
421
	0.14282911023321884847623576259639164703
422
]"""
423
    assert result == expected
424

425

426
@pytest.mark.parametrize(
427
    ("lf", "expected"),
428
    [
429
        (
430
            (
431
                pl.LazyFrame({"a": [1]})
432
                .with_columns(b=pl.col("a"))
433
                .with_columns(c=pl.col("b"), d=pl.col("a"))
434
            ),
435
            'simple π 4/4 ["a", "b", "c", "d"]',
436
        ),
437
        (
438
            (
439
                pl.LazyFrame({"a_very_very_long_string": [1], "a": [1]})
440
                .with_columns(b=pl.col("a"))
441
                .with_columns(c=pl.col("b"), d=pl.col("a"))
442
            ),
443
            'simple π 5/5 ["a_very_very_long_string", "a", ... 3 other columns]',
444
        ),
445
        (
446
            (
447
                pl.LazyFrame({"an_even_longer_very_very_long_string": [1], "a": [1]})
448
                .with_columns(b=pl.col("a"))
449
                .with_columns(c=pl.col("b"), d=pl.col("a"))
450
            ),
451
            'simple π 5/5 ["an_even_longer_very_very_long_string", ... 4 other columns]',
452
        ),
453
        (
454
            (
455
                pl.LazyFrame({"a": [1]})
456
                .with_columns(b=pl.col("a"))
457
                .with_columns(c=pl.col("b"), a_very_long_string_at_the_end=pl.col("a"))
458
            ),
459
            'simple π 4/4 ["a", "b", "c", ... 1 other column]',
460
        ),
461
        (
462
            (
463
                pl.LazyFrame({"a": [1]})
464
                .with_columns(b=pl.col("a"))
465
                .with_columns(
466
                    a_very_long_string_in_the_middle=pl.col("b"), d=pl.col("a")
467
                )
468
            ),
469
            'simple π 4/4 ["a", "b", ... 2 other columns]',
470
        ),
471
    ],
472
)
473
def test_simple_project_format(lf: pl.LazyFrame, expected: str) -> None:
474
    result = lf.explain()
475
    assert expected in result
476

477

478
@pytest.mark.parametrize(
479
    ("df", "expected"),
480
    [
481
        pytest.param(
482
            pl.DataFrame({"A": range(4)}),
483
            """shape: (4, 1)
484
+-----+
485
| A   |
486
+=====+
487
| 0   |
488
| 1   |
489
| ... |
490
| 3   |
491
+-----+""",
492
            id="Ellipsis correctly aligned",
493
        ),
494
        pytest.param(
495
            pl.DataFrame({"A": range(2)}),
496
            """shape: (2, 1)
497
+---+
498
| A |
499
+===+
500
| 0 |
501
| 1 |
502
+---+""",
503
            id="No ellipsis needed",
504
        ),
505
    ],
506
)
507
def test_format_ascii_table_truncation(df: pl.DataFrame, expected: str) -> None:
508
    with pl.Config(tbl_rows=3, tbl_hide_column_data_types=True, ascii_tables=True):
509
        assert str(df) == expected
510

511

512
def test_format_21393() -> None:
513
    assert pl.select(pl.format("{}", pl.lit(1, pl.Int128))).item() == "1"
514

515
Product

Resources

Company