Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/interchange/test_dataframe.py
6939 views
1
from __future__ import annotations
2
3
import pytest
4
5
import polars as pl
6
from polars.interchange.dataframe import PolarsDataFrame
7
from polars.interchange.protocol import CopyNotAllowedError
8
from polars.testing import assert_frame_equal, assert_series_equal
9
10
11
def test_dataframe_dunder() -> None:
12
df = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
13
dfi = PolarsDataFrame(df)
14
15
assert_frame_equal(dfi._df, df)
16
assert dfi._allow_copy is True
17
18
dfi_new = dfi.__dataframe__(allow_copy=False)
19
20
assert_frame_equal(dfi_new._df, df)
21
assert dfi_new._allow_copy is False
22
23
24
def test_dataframe_dunder_nan_as_null_not_implemented() -> None:
25
df = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
26
dfi = PolarsDataFrame(df)
27
28
with pytest.raises(NotImplementedError, match="has not been implemented"):
29
df.__dataframe__(nan_as_null=True)
30
31
with pytest.raises(NotImplementedError, match="has not been implemented"):
32
dfi.__dataframe__(nan_as_null=True)
33
34
35
def test_metadata() -> None:
36
df = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
37
dfi = PolarsDataFrame(df)
38
assert dfi.metadata == {}
39
40
41
def test_num_columns() -> None:
42
df = pl.DataFrame({"a": [1], "b": [2]})
43
dfi = PolarsDataFrame(df)
44
assert dfi.num_columns() == 2
45
46
47
def test_num_rows() -> None:
48
df = pl.DataFrame({"a": [1, 2], "b": [3, 4]})
49
dfi = PolarsDataFrame(df)
50
assert dfi.num_rows() == 2
51
52
53
def test_num_chunks() -> None:
54
df = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
55
dfi = PolarsDataFrame(df)
56
assert dfi.num_chunks() == 1
57
58
df2 = pl.concat([df, df], rechunk=False)
59
dfi2 = df2.__dataframe__()
60
assert dfi2.num_chunks() == 2
61
62
63
def test_column_names() -> None:
64
df = pl.DataFrame({"a": [1], "b": [2]})
65
dfi = PolarsDataFrame(df)
66
assert dfi.column_names() == ["a", "b"]
67
68
69
def test_get_column() -> None:
70
df = pl.DataFrame({"a": [1, 2], "b": [3, 4]})
71
dfi = PolarsDataFrame(df)
72
73
out = dfi.get_column(1)
74
75
expected = pl.Series("b", [3, 4])
76
assert_series_equal(out._col, expected)
77
78
79
def test_get_column_by_name() -> None:
80
df = pl.DataFrame({"a": [1, 2], "b": [3, 4]})
81
dfi = PolarsDataFrame(df)
82
83
out = dfi.get_column_by_name("b")
84
85
expected = pl.Series("b", [3, 4])
86
assert_series_equal(out._col, expected)
87
88
89
def test_get_columns() -> None:
90
s1 = pl.Series("a", [1, 2])
91
s2 = pl.Series("b", [3, 4])
92
df = pl.DataFrame([s1, s2])
93
dfi = PolarsDataFrame(df)
94
95
out = dfi.get_columns()
96
97
expected = [s1, s2]
98
for o, e in zip(out, expected):
99
assert_series_equal(o._col, e)
100
101
102
def test_select_columns() -> None:
103
df = pl.DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]})
104
dfi = PolarsDataFrame(df)
105
106
out = dfi.select_columns([0, 2])
107
108
expected = pl.DataFrame({"a": [1, 2], "c": [5, 6]})
109
assert_frame_equal(out._df, expected)
110
111
112
def test_select_columns_nonlist_input() -> None:
113
df = pl.DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]})
114
dfi = PolarsDataFrame(df)
115
116
out = dfi.select_columns((2,))
117
118
expected = pl.DataFrame({"c": [5, 6]})
119
assert_frame_equal(out._df, expected)
120
121
122
def test_select_columns_invalid_input() -> None:
123
df = pl.DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]})
124
dfi = PolarsDataFrame(df)
125
126
with pytest.raises(TypeError):
127
dfi.select_columns(1) # type: ignore[arg-type]
128
129
130
def test_select_columns_by_name() -> None:
131
df = pl.DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]})
132
dfi = PolarsDataFrame(df)
133
134
out = dfi.select_columns_by_name(["a", "c"])
135
136
expected = pl.DataFrame({"a": [1, 2], "c": [5, 6]})
137
assert_frame_equal(out._df, expected)
138
139
140
def test_select_columns_by_name_invalid_input() -> None:
141
df = pl.DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]})
142
dfi = PolarsDataFrame(df)
143
144
with pytest.raises(TypeError):
145
dfi.select_columns_by_name(1) # type: ignore[arg-type]
146
147
148
@pytest.mark.parametrize("n_chunks", [None, 2])
149
def test_get_chunks(n_chunks: int | None) -> None:
150
df1 = pl.DataFrame({"a": [1, 2], "b": [4, 5]})
151
df2 = pl.DataFrame({"a": [3], "b": [6]})
152
df = pl.concat([df1, df2], rechunk=False)
153
dfi = PolarsDataFrame(df)
154
155
out = dfi.get_chunks(n_chunks)
156
157
expected = dfi._get_chunks_from_col_chunks()
158
for o, e in zip(out, expected):
159
assert_frame_equal(o._df, e)
160
161
162
def test_get_chunks_invalid_input() -> None:
163
df1 = pl.DataFrame({"a": [1, 2], "b": [4, 5]})
164
df2 = pl.DataFrame({"a": [3], "b": [6]})
165
df = pl.concat([df1, df2], rechunk=False)
166
167
dfi = PolarsDataFrame(df)
168
169
with pytest.raises(ValueError):
170
next(dfi.get_chunks(0))
171
172
with pytest.raises(ValueError):
173
next(dfi.get_chunks(3))
174
175
176
def test_get_chunks_subdivided_chunks() -> None:
177
df1 = pl.DataFrame({"a": [1, 2, 3], "b": [6, 7, 8]})
178
df2 = pl.DataFrame({"a": [4, 5], "b": [9, 0]})
179
df = pl.concat([df1, df2], rechunk=False)
180
181
dfi = PolarsDataFrame(df)
182
out = dfi.get_chunks(4)
183
184
chunk1 = next(out)
185
expected1 = pl.DataFrame({"a": [1, 2], "b": [6, 7]})
186
assert_frame_equal(chunk1._df, expected1)
187
188
chunk2 = next(out)
189
expected2 = pl.DataFrame({"a": [3], "b": [8]})
190
assert_frame_equal(chunk2._df, expected2)
191
192
chunk3 = next(out)
193
expected3 = pl.DataFrame({"a": [4], "b": [9]})
194
assert_frame_equal(chunk3._df, expected3)
195
196
chunk4 = next(out)
197
expected4 = pl.DataFrame({"a": [5], "b": [0]})
198
assert_frame_equal(chunk4._df, expected4)
199
200
with pytest.raises(StopIteration):
201
next(out)
202
203
204
def test_get_chunks_zero_copy_fail() -> None:
205
col1 = pl.Series([1, 2])
206
col2 = pl.concat([pl.Series([3]), pl.Series([4])], rechunk=False)
207
df = pl.DataFrame({"a": col1, "b": col2})
208
209
dfi = PolarsDataFrame(df, allow_copy=False)
210
211
with pytest.raises(
212
CopyNotAllowedError, match="unevenly chunked columns must be rechunked"
213
):
214
next(dfi.get_chunks())
215
216
217
@pytest.mark.parametrize("allow_copy", [True, False])
218
def test_get_chunks_from_col_chunks_single_chunk(allow_copy: bool) -> None:
219
df = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
220
221
dfi = PolarsDataFrame(df, allow_copy=allow_copy)
222
out = dfi._get_chunks_from_col_chunks()
223
224
chunk1 = next(out)
225
assert_frame_equal(chunk1, df)
226
227
with pytest.raises(StopIteration):
228
next(out)
229
230
231
@pytest.mark.parametrize("allow_copy", [True, False])
232
def test_get_chunks_from_col_chunks_even_chunks(allow_copy: bool) -> None:
233
df1 = pl.DataFrame({"a": [1, 2], "b": [4, 5]})
234
df2 = pl.DataFrame({"a": [3], "b": [6]})
235
df = pl.concat([df1, df2], rechunk=False)
236
237
dfi = PolarsDataFrame(df, allow_copy=allow_copy)
238
out = dfi._get_chunks_from_col_chunks()
239
240
chunk1 = next(out)
241
assert_frame_equal(chunk1, df1)
242
243
chunk2 = next(out)
244
assert_frame_equal(chunk2, df2)
245
246
with pytest.raises(StopIteration):
247
next(out)
248
249
250
def test_get_chunks_from_col_chunks_uneven_chunks_allow_copy() -> None:
251
col1 = pl.concat([pl.Series([1, 2]), pl.Series([3, 4, 5])], rechunk=False)
252
col2 = pl.concat(
253
[pl.Series([6, 7]), pl.Series([8]), pl.Series([9, 0])], rechunk=False
254
)
255
df = pl.DataFrame({"a": col1, "b": col2})
256
257
dfi = PolarsDataFrame(df, allow_copy=True)
258
out = dfi._get_chunks_from_col_chunks()
259
260
expected1 = pl.DataFrame({"a": [1, 2], "b": [6, 7]})
261
chunk1 = next(out)
262
assert_frame_equal(chunk1, expected1)
263
264
expected2 = pl.DataFrame({"a": [3, 4, 5], "b": [8, 9, 0]})
265
chunk2 = next(out)
266
assert_frame_equal(chunk2, expected2)
267
268
with pytest.raises(StopIteration):
269
next(out)
270
271
272
def test_get_chunks_from_col_chunks_uneven_chunks_zero_copy_fails() -> None:
273
col1 = pl.concat([pl.Series([1, 2]), pl.Series([3, 4, 5])], rechunk=False)
274
col2 = pl.concat(
275
[pl.Series([6, 7]), pl.Series([8]), pl.Series([9, 0])], rechunk=False
276
)
277
df = pl.DataFrame({"a": col1, "b": col2})
278
279
dfi = PolarsDataFrame(df, allow_copy=False)
280
out = dfi._get_chunks_from_col_chunks()
281
282
# First chunk can be yielded zero copy
283
expected1 = pl.DataFrame({"a": [1, 2], "b": [6, 7]})
284
chunk1 = next(out)
285
assert_frame_equal(chunk1, expected1)
286
287
# Second chunk requires a rechunk of the second column
288
with pytest.raises(CopyNotAllowedError, match="columns must be rechunked"):
289
next(out)
290
291
292
def test_dataframe_unsupported_types() -> None:
293
df = pl.DataFrame({"a": [[4], [5, 6]]})
294
dfi = PolarsDataFrame(df)
295
296
# Generic dataframe operations work fine
297
assert dfi.num_rows() == 2
298
299
# Certain column operations also work
300
col = dfi.get_column_by_name("a")
301
assert col.num_chunks() == 1
302
303
# Error is raised when unsupported operations are requested
304
with pytest.raises(ValueError, match="not supported"):
305
col.dtype
306
307