Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/io/test_other.py
8427 views
1
from __future__ import annotations
2
3
import copy
4
import sys
5
from pathlib import Path
6
from typing import TYPE_CHECKING, Any, cast
7
8
import pytest
9
10
import polars as pl
11
from polars.testing import assert_frame_equal, assert_series_equal
12
13
if TYPE_CHECKING:
14
from collections.abc import Callable
15
16
from tests.conftest import PlMonkeyPatch
17
18
19
@pytest.mark.parametrize(
20
"read_function",
21
[
22
pl.read_csv,
23
pl.read_ipc,
24
pl.read_json,
25
pl.read_parquet,
26
pl.read_avro,
27
pl.scan_csv,
28
pl.scan_ipc,
29
pl.scan_parquet,
30
],
31
)
32
def test_read_missing_file(read_function: Callable[[Any], pl.DataFrame]) -> None:
33
match = "\\(os error 2\\): fake_file_path"
34
# The message associated with OS error 2 may differ per platform
35
if sys.platform == "linux":
36
match = "No such file or directory " + match
37
38
if "scan" in read_function.__name__:
39
with pytest.raises(FileNotFoundError, match=match):
40
read_function("fake_file_path").collect() # type: ignore[attr-defined]
41
else:
42
with pytest.raises(FileNotFoundError, match=match):
43
read_function("fake_file_path")
44
45
46
@pytest.mark.parametrize(
47
"write_method_name",
48
[
49
# "write_excel" not included
50
# because it already raises a FileCreateError
51
# from the underlying library dependency
52
"write_csv",
53
"write_ipc",
54
"write_ipc_stream",
55
"write_json",
56
"write_ndjson",
57
"write_parquet",
58
"write_avro",
59
],
60
)
61
def test_write_missing_directory(write_method_name: str) -> None:
62
df = pl.DataFrame({"a": [1]})
63
non_existing_path = Path("non", "existing", "path")
64
if non_existing_path.exists():
65
pytest.fail(
66
"Testing on a non existing path failed because the path does exist."
67
)
68
write_method = getattr(df, write_method_name)
69
with pytest.raises(FileNotFoundError):
70
write_method(non_existing_path)
71
72
73
def test_read_missing_file_path_truncated() -> None:
74
content = "lskdfj".join(str(i) for i in range(25))
75
76
with pytest.raises(
77
FileNotFoundError,
78
match=r"\.\.\.lskdfj14lskdfj15lskdfj16lskdfj17lskdfj18lskdfj19lskdfj20lskdfj21lskdfj22lskdfj23lskdfj24 \(set POLARS_VERBOSE=1 to see full path\)",
79
):
80
pl.read_csv(content)
81
82
83
def test_read_missing_file_path_expanded_when_polars_verbose_enabled(
84
plmonkeypatch: PlMonkeyPatch,
85
) -> None:
86
content = "lskdfj".join(str(i) for i in range(25))
87
88
plmonkeypatch.setenv("POLARS_VERBOSE", "1")
89
90
with pytest.raises(
91
FileNotFoundError,
92
match=content,
93
):
94
pl.read_csv(content)
95
96
97
def test_copy() -> None:
98
df = pl.DataFrame({"a": [1, 2], "b": ["a", None], "c": [True, False]})
99
assert_frame_equal(copy.copy(df), df)
100
assert_frame_equal(copy.deepcopy(df), df)
101
102
a = pl.Series("a", [1, 2])
103
assert_series_equal(copy.copy(a), a)
104
assert_series_equal(copy.deepcopy(a), a)
105
106
107
def test_categorical_round_trip() -> None:
108
df = pl.DataFrame({"ints": [1, 2, 3], "cat": ["a", "b", "c"]})
109
df = df.with_columns(pl.col("cat").cast(pl.Categorical))
110
111
tbl = df.to_arrow()
112
assert "dictionary" in str(tbl["cat"].type)
113
114
df2 = cast("pl.DataFrame", pl.from_arrow(tbl))
115
assert df2.dtypes == [pl.Int64, pl.Categorical]
116
117
118
def test_from_different_chunks() -> None:
119
s0 = pl.Series("a", [1, 2, 3, 4, None])
120
s1 = pl.Series("b", [1, 2])
121
s11 = pl.Series("b", [1, 2, 3])
122
s1.append(s11)
123
124
# check we don't panic
125
df = pl.DataFrame([s0, s1])
126
df.to_arrow()
127
df = pl.DataFrame([s0, s1])
128
out = df.to_pandas()
129
assert list(out.columns) == ["a", "b"]
130
assert out.shape == (5, 2)
131
132
133
def test_unit_io_subdir_has_no_init() -> None:
134
# --------------------------------------------------------------------------------
135
# If this test fails it means an '__init__.py' was added to 'tests/unit/io'.
136
# See https://github.com/pola-rs/polars/pull/6889 for why this can cause issues.
137
# --------------------------------------------------------------------------------
138
# TLDR: it can mask the builtin 'io' module, causing a fatal python error.
139
# --------------------------------------------------------------------------------
140
io_dir = Path(__file__).parent
141
assert io_dir.parts[-2:] == ("unit", "io")
142
assert not (io_dir / "__init__.py").exists(), (
143
"Found undesirable '__init__.py' in the 'unit.io' tests subdirectory"
144
)
145
146
147
@pytest.mark.write_disk
148
@pytest.mark.parametrize(
149
("scan_funcs", "write_func"),
150
[
151
([pl.scan_parquet, pl.read_parquet], pl.DataFrame.write_parquet),
152
([pl.scan_csv, pl.read_csv], pl.DataFrame.write_csv),
153
],
154
)
155
@pytest.mark.parametrize("char", ["[", "*"])
156
def test_no_glob(
157
scan_funcs: list[Callable[[Any], pl.LazyFrame | pl.DataFrame]],
158
write_func: Callable[[pl.DataFrame, Path], None],
159
char: str,
160
tmp_path: Path,
161
) -> None:
162
if sys.platform == "win32" and char == "*":
163
pytest.skip("unsupported glob char for windows")
164
165
tmp_path.mkdir(exist_ok=True)
166
167
df = pl.DataFrame({"x": 1})
168
169
paths = [tmp_path / f"{char}", tmp_path / f"{char}1"]
170
171
write_func(df, paths[0])
172
write_func(df, paths[1])
173
174
for func in scan_funcs:
175
assert_frame_equal(func(paths[0], glob=False).lazy().collect(), df) # type: ignore[call-arg]
176
177