CoCalc -- test

GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/io/test_other.py
⁶⁹³⁹ views
1
from __future__ import annotations
2

3
import copy
4
import sys
5
from pathlib import Path
6
from typing import Any, Callable, cast
7

8
import pytest
9

10
import polars as pl
11
from polars.testing import assert_frame_equal, assert_series_equal
12

13

14
@pytest.mark.parametrize(
15
    "read_function",
16
    [
17
        pl.read_csv,
18
        pl.read_ipc,
19
        pl.read_json,
20
        pl.read_parquet,
21
        pl.read_avro,
22
        pl.scan_csv,
23
        pl.scan_ipc,
24
        pl.scan_parquet,
25
    ],
26
)
27
def test_read_missing_file(read_function: Callable[[Any], pl.DataFrame]) -> None:
28
    match = "\\(os error 2\\): fake_file_path"
29
    # The message associated with OS error 2 may differ per platform
30
    if sys.platform == "linux":
31
        match = "No such file or directory " + match
32

33
    if "scan" in read_function.__name__:
34
        with pytest.raises(FileNotFoundError, match=match):
35
            read_function("fake_file_path").collect()  # type: ignore[attr-defined]
36
    else:
37
        with pytest.raises(FileNotFoundError, match=match):
38
            read_function("fake_file_path")
39

40

41
@pytest.mark.parametrize(
42
    "write_method_name",
43
    [
44
        # "write_excel" not included
45
        # because it already raises a FileCreateError
46
        # from the underlying library dependency
47
        "write_csv",
48
        "write_ipc",
49
        "write_ipc_stream",
50
        "write_json",
51
        "write_ndjson",
52
        "write_parquet",
53
        "write_avro",
54
    ],
55
)
56
def test_write_missing_directory(write_method_name: str) -> None:
57
    df = pl.DataFrame({"a": [1]})
58
    non_existing_path = Path("non", "existing", "path")
59
    if non_existing_path.exists():
60
        pytest.fail(
61
            "Testing on a non existing path failed because the path does exist."
62
        )
63
    write_method = getattr(df, write_method_name)
64
    with pytest.raises(FileNotFoundError):
65
        write_method(non_existing_path)
66

67

68
def test_read_missing_file_path_truncated() -> None:
69
    content = "lskdfj".join(str(i) for i in range(25))
70

71
    with pytest.raises(
72
        FileNotFoundError,
73
        match="\\.\\.\\.lskdfj14lskdfj15lskdfj16lskdfj17lskdfj18lskdfj19lskdfj20lskdfj21lskdfj22lskdfj23lskdfj24 \\(set POLARS_VERBOSE=1 to see full path\\)",
74
    ):
75
        pl.read_csv(content)
76

77

78
def test_read_missing_file_path_expanded_when_polars_verbose_enabled(
79
    monkeypatch: pytest.MonkeyPatch,
80
) -> None:
81
    content = "lskdfj".join(str(i) for i in range(25))
82

83
    monkeypatch.setenv("POLARS_VERBOSE", "1")
84

85
    with pytest.raises(
86
        FileNotFoundError,
87
        match=content,
88
    ):
89
        pl.read_csv(content)
90

91

92
def test_copy() -> None:
93
    df = pl.DataFrame({"a": [1, 2], "b": ["a", None], "c": [True, False]})
94
    assert_frame_equal(copy.copy(df), df)
95
    assert_frame_equal(copy.deepcopy(df), df)
96

97
    a = pl.Series("a", [1, 2])
98
    assert_series_equal(copy.copy(a), a)
99
    assert_series_equal(copy.deepcopy(a), a)
100

101

102
def test_categorical_round_trip() -> None:
103
    df = pl.DataFrame({"ints": [1, 2, 3], "cat": ["a", "b", "c"]})
104
    df = df.with_columns(pl.col("cat").cast(pl.Categorical))
105

106
    tbl = df.to_arrow()
107
    assert "dictionary" in str(tbl["cat"].type)
108

109
    df2 = cast(pl.DataFrame, pl.from_arrow(tbl))
110
    assert df2.dtypes == [pl.Int64, pl.Categorical]
111

112

113
def test_from_different_chunks() -> None:
114
    s0 = pl.Series("a", [1, 2, 3, 4, None])
115
    s1 = pl.Series("b", [1, 2])
116
    s11 = pl.Series("b", [1, 2, 3])
117
    s1.append(s11)
118

119
    # check we don't panic
120
    df = pl.DataFrame([s0, s1])
121
    df.to_arrow()
122
    df = pl.DataFrame([s0, s1])
123
    out = df.to_pandas()
124
    assert list(out.columns) == ["a", "b"]
125
    assert out.shape == (5, 2)
126

127

128
def test_unit_io_subdir_has_no_init() -> None:
129
    # --------------------------------------------------------------------------------
130
    # If this test fails it means an '__init__.py' was added to 'tests/unit/io'.
131
    # See https://github.com/pola-rs/polars/pull/6889 for why this can cause issues.
132
    # --------------------------------------------------------------------------------
133
    # TLDR: it can mask the builtin 'io' module, causing a fatal python error.
134
    # --------------------------------------------------------------------------------
135
    io_dir = Path(__file__).parent
136
    assert io_dir.parts[-2:] == ("unit", "io")
137
    assert not (io_dir / "__init__.py").exists(), (
138
        "Found undesirable '__init__.py' in the 'unit.io' tests subdirectory"
139
    )
140

141

142
@pytest.mark.write_disk
143
@pytest.mark.parametrize(
144
    ("scan_funcs", "write_func"),
145
    [
146
        ([pl.scan_parquet, pl.read_parquet], pl.DataFrame.write_parquet),
147
        ([pl.scan_csv, pl.read_csv], pl.DataFrame.write_csv),
148
    ],
149
)
150
@pytest.mark.parametrize("char", ["[", "*"])
151
def test_no_glob(
152
    scan_funcs: list[Callable[[Any], pl.LazyFrame | pl.DataFrame]],
153
    write_func: Callable[[pl.DataFrame, Path], None],
154
    char: str,
155
    tmp_path: Path,
156
) -> None:
157
    if sys.platform == "win32" and char == "*":
158
        pytest.skip("unsupported glob char for windows")
159

160
    tmp_path.mkdir(exist_ok=True)
161

162
    df = pl.DataFrame({"x": 1})
163

164
    paths = [tmp_path / f"{char}", tmp_path / f"{char}1"]
165

166
    write_func(df, paths[0])
167
    write_func(df, paths[1])
168

169
    for func in scan_funcs:
170
        assert_frame_equal(func(paths[0], glob=False).lazy().collect(), df)  # type: ignore[call-arg]
171

172
Product

Resources

Company