Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/operations/namespaces/test_meta.py
6940 views
1
from __future__ import annotations
2
3
import re
4
from datetime import date, datetime, time, timedelta
5
from typing import TYPE_CHECKING, Any
6
7
import pytest
8
9
import polars as pl
10
import polars.selectors as cs
11
from polars.exceptions import ComputeError
12
from tests.unit.conftest import NUMERIC_DTYPES
13
14
if TYPE_CHECKING:
15
from pathlib import Path
16
17
18
def test_root_and_output_names() -> None:
19
e = pl.col("foo") * pl.col("bar")
20
assert e.meta.output_name() == "foo"
21
assert e.meta.root_names() == ["foo", "bar"]
22
23
e = pl.col("foo").filter(bar=13)
24
assert e.meta.output_name() == "foo"
25
assert e.meta.root_names() == ["foo", "bar"]
26
27
e = pl.sum("foo").over("groups")
28
assert e.meta.output_name() == "foo"
29
assert e.meta.root_names() == ["foo", "groups"]
30
31
e = pl.sum("foo").slice(pl.len() - 10, pl.col("bar"))
32
assert e.meta.output_name() == "foo"
33
assert e.meta.root_names() == ["foo", "bar"]
34
35
e = pl.len()
36
assert e.meta.output_name() == "len"
37
38
with pytest.raises(
39
ComputeError,
40
match=re.escape(
41
"unable to find root column name for expr 'cs.all()' when calling 'output_name'"
42
),
43
):
44
pl.all().name.suffix("_").meta.output_name()
45
46
assert (
47
pl.all().name.suffix("_").meta.output_name(raise_if_undetermined=False) is None
48
)
49
50
51
def test_undo_aliases() -> None:
52
e = pl.col("foo").alias("bar")
53
assert e.meta.undo_aliases().meta == pl.col("foo")
54
55
e = pl.col("foo").sum().over("bar")
56
assert e.name.keep().meta.undo_aliases().meta == e
57
58
e.alias("bar").alias("foo")
59
assert e.meta.undo_aliases().meta == e
60
assert e.name.suffix("ham").meta.undo_aliases().meta == e
61
62
63
def test_meta_has_multiple_outputs() -> None:
64
e = pl.col(["a", "b"]).name.suffix("_foo")
65
assert e.meta.has_multiple_outputs()
66
67
68
def test_is_column() -> None:
69
e = pl.col("foo")
70
assert e.meta.is_column()
71
72
e = pl.col("foo").alias("bar")
73
assert not e.meta.is_column()
74
75
e = pl.col("foo") * pl.col("bar")
76
assert not e.meta.is_column()
77
78
79
@pytest.mark.parametrize(
80
("expr", "is_column_selection"),
81
[
82
# columns
83
(pl.col("foo"), True),
84
(pl.col("foo", "bar"), True),
85
(pl.col(NUMERIC_DTYPES), True),
86
# column expressions
87
(pl.col("foo") + 100, False),
88
(pl.col("foo").floordiv(10), False),
89
(pl.col("foo") * pl.col("bar"), False),
90
# selectors / expressions
91
(cs.numeric() * 100, False),
92
(cs.temporal() - cs.time(), True),
93
(cs.numeric().exclude("value"), True),
94
((cs.temporal() - cs.time()).exclude("dt"), True),
95
# top-level selection funcs
96
(pl.nth(2), True),
97
(pl.first(), True),
98
(pl.last(), True),
99
],
100
)
101
def test_is_column_selection(
102
expr: pl.Expr,
103
is_column_selection: bool,
104
) -> None:
105
if is_column_selection:
106
assert expr.meta.is_column_selection()
107
assert expr.meta.is_column_selection(allow_aliasing=True)
108
expr = (
109
expr.name.suffix("!")
110
if expr.meta.has_multiple_outputs()
111
else expr.alias("!")
112
)
113
assert not expr.meta.is_column_selection()
114
assert expr.meta.is_column_selection(allow_aliasing=True)
115
else:
116
assert not expr.meta.is_column_selection()
117
118
119
@pytest.mark.parametrize(
120
"value",
121
[
122
None,
123
1234,
124
567.89,
125
float("inf"),
126
date.today(),
127
datetime.now(),
128
time(10, 30, 45),
129
timedelta(hours=-24),
130
["x", "y", "z"],
131
pl.Series([None, None]),
132
[[10, 20], [30, 40]],
133
"this is the way",
134
],
135
)
136
def test_is_literal(value: Any) -> None:
137
e = pl.lit(value)
138
assert e.meta.is_literal()
139
140
e = pl.lit(value).alias("foo")
141
assert not e.meta.is_literal()
142
143
e = pl.lit(value).alias("foo")
144
assert e.meta.is_literal(allow_aliasing=True)
145
146
147
def test_meta_is_regex_projection() -> None:
148
e = pl.col("^.*$").name.suffix("_foo")
149
assert e.meta.is_regex_projection()
150
assert e.meta.has_multiple_outputs()
151
152
e = pl.col("^.*") # no trailing '$'
153
assert not e.meta.is_regex_projection()
154
assert not e.meta.has_multiple_outputs()
155
assert e.meta.is_column()
156
157
158
def test_meta_tree_format(namespace_files_path: Path) -> None:
159
with (namespace_files_path / "test_tree_fmt.txt").open("r", encoding="utf-8") as f:
160
test_sets = f.read().split("---")
161
for test_set in test_sets:
162
expression = test_set.strip().split("\n")[0]
163
tree_fmt = "\n".join(test_set.strip().split("\n")[1:])
164
e = eval(expression)
165
result = e.meta.tree_format(return_as_string=True)
166
result = "\n".join(s.rstrip() for s in result.split("\n"))
167
assert result.strip() == tree_fmt.strip()
168
169
170
def test_meta_show_graph(namespace_files_path: Path) -> None:
171
e = (pl.col("foo") * pl.col("bar")).sum().over(pl.col("ham")) / 2
172
dot = e.meta.show_graph(show=False, raw_output=True)
173
assert dot is not None
174
assert len(dot) > 0
175
# Don't check output contents since this creates a maintenance burden
176
# Assume output check in test_meta_tree_format is enough
177
178
179
def test_literal_output_name() -> None:
180
e = pl.lit(1)
181
assert e.meta.output_name() == "literal"
182
183
e = pl.lit(pl.Series("abc", [1, 2, 3]))
184
assert e.meta.output_name() == "abc"
185
186
e = pl.lit(pl.Series([1, 2, 3]))
187
assert e.meta.output_name() == ""
188
189
190
def test_struct_field_output_name_24003() -> None:
191
assert pl.col("ball").struct.field("radius").meta.output_name() == "radius"
192
193
194
def test_selector_by_name_single() -> None:
195
assert cs.by_name("foo").meta.output_name() == "foo"
196
197
198
def test_selector_by_name_multiple() -> None:
199
with pytest.raises(ComputeError):
200
cs.by_name(["foo", "bar"]).meta.output_name()
201
202