Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/functions/test_business_day_count.py
6939 views
1
from __future__ import annotations
2
3
import datetime as dt
4
from datetime import date
5
6
import hypothesis.strategies as st
7
import numpy as np
8
import pytest
9
from hypothesis import assume, given, reject
10
11
import polars as pl
12
from polars._utils.various import parse_version
13
from polars.exceptions import ComputeError
14
from polars.testing import assert_series_equal
15
16
17
def test_business_day_count() -> None:
18
# (Expression, expression)
19
df = pl.DataFrame(
20
{
21
"start": [date(2020, 1, 1), date(2020, 1, 2)],
22
"end": [date(2020, 1, 2), date(2020, 1, 10)],
23
}
24
)
25
result = df.select(
26
business_day_count=pl.business_day_count("start", "end"),
27
)["business_day_count"]
28
expected = pl.Series("business_day_count", [1, 6], pl.Int32)
29
assert_series_equal(result, expected)
30
31
# (Expression, scalar)
32
result = df.select(
33
business_day_count=pl.business_day_count("start", date(2020, 1, 10)),
34
)["business_day_count"]
35
expected = pl.Series("business_day_count", [7, 6], pl.Int32)
36
assert_series_equal(result, expected)
37
38
result = df.select(
39
business_day_count=pl.business_day_count("start", pl.lit(None, dtype=pl.Date)),
40
)["business_day_count"]
41
expected = pl.Series("business_day_count", [None, None], pl.Int32)
42
assert_series_equal(result, expected)
43
44
# (Scalar, expression)
45
result = df.select(
46
business_day_count=pl.business_day_count(date(2020, 1, 1), "end"),
47
)["business_day_count"]
48
expected = pl.Series("business_day_count", [1, 7], pl.Int32)
49
assert_series_equal(result, expected)
50
# see GH issue #23663
51
assert df.lazy().select(
52
pl.business_day_count(date(2020, 1, 1), "end")
53
).collect_schema() == pl.Schema({"literal": pl.Int32})
54
55
result = df.select(
56
business_day_count=pl.business_day_count(pl.lit(None, dtype=pl.Date), "end"),
57
)["business_day_count"]
58
expected = pl.Series("business_day_count", [None, None], pl.Int32)
59
assert_series_equal(result, expected)
60
61
# (Scalar, scalar)
62
result = df.select(
63
business_day_count=pl.business_day_count(date(2020, 1, 1), date(2020, 1, 10)),
64
)["business_day_count"]
65
expected = pl.Series("business_day_count", [7], pl.Int32)
66
assert_series_equal(result, expected)
67
68
69
def test_business_day_count_w_week_mask() -> None:
70
df = pl.DataFrame(
71
{
72
"start": [date(2020, 1, 1), date(2020, 1, 2)],
73
"end": [date(2020, 1, 2), date(2020, 1, 10)],
74
}
75
)
76
result = df.select(
77
business_day_count=pl.business_day_count(
78
"start", "end", week_mask=(True, True, True, True, True, True, False)
79
),
80
)["business_day_count"]
81
expected = pl.Series("business_day_count", [1, 7], pl.Int32)
82
assert_series_equal(result, expected)
83
84
result = df.select(
85
business_day_count=pl.business_day_count(
86
"start", "end", week_mask=(True, True, True, False, False, False, True)
87
),
88
)["business_day_count"]
89
expected = pl.Series("business_day_count", [1, 4], pl.Int32)
90
assert_series_equal(result, expected)
91
92
93
def test_business_day_count_w_week_mask_invalid() -> None:
94
with pytest.raises(ValueError, match=r"expected a sequence of length 7 \(got 2\)"):
95
pl.business_day_count("start", "end", week_mask=(False, 0)) # type: ignore[arg-type]
96
df = pl.DataFrame(
97
{
98
"start": [date(2020, 1, 1), date(2020, 1, 2)],
99
"end": [date(2020, 1, 2), date(2020, 1, 10)],
100
}
101
)
102
with pytest.raises(
103
ComputeError, match="`week_mask` must have at least one business day"
104
):
105
df.select(pl.business_day_count("start", "end", week_mask=[False] * 7))
106
107
108
def test_business_day_count_schema() -> None:
109
lf = pl.LazyFrame(
110
{
111
"start": [date(2020, 1, 1), date(2020, 1, 2)],
112
"end": [date(2020, 1, 2), date(2020, 1, 10)],
113
}
114
)
115
result = lf.select(
116
business_day_count=pl.business_day_count("start", "end"),
117
)
118
assert result.collect_schema()["business_day_count"] == pl.Int32
119
assert result.collect().schema["business_day_count"] == pl.Int32
120
assert 'col("start").business_day_count([col("end")])' in result.explain()
121
122
123
def test_business_day_count_w_holidays() -> None:
124
df = pl.DataFrame(
125
{
126
"start": [date(2020, 1, 1), date(2020, 1, 2), date(2020, 1, 2)],
127
"end": [date(2020, 1, 2), date(2020, 1, 10), date(2020, 1, 9)],
128
}
129
)
130
result = df.select(
131
business_day_count=pl.business_day_count(
132
"start", "end", holidays=[date(2020, 1, 1), date(2020, 1, 9)]
133
),
134
)["business_day_count"]
135
expected = pl.Series("business_day_count", [0, 5, 5], pl.Int32)
136
assert_series_equal(result, expected)
137
138
139
@given(
140
start=st.dates(min_value=dt.date(1969, 1, 1), max_value=dt.date(1970, 12, 31)),
141
end=st.dates(min_value=dt.date(1969, 1, 1), max_value=dt.date(1970, 12, 31)),
142
week_mask=st.lists(
143
st.sampled_from([True, False]),
144
min_size=7,
145
max_size=7,
146
),
147
holidays=st.lists(
148
st.dates(min_value=dt.date(1969, 1, 1), max_value=dt.date(1970, 12, 31)),
149
min_size=0,
150
max_size=100,
151
),
152
)
153
def test_against_np_busday_count(
154
start: dt.date, end: dt.date, week_mask: tuple[bool, ...], holidays: list[dt.date]
155
) -> None:
156
assume(any(week_mask))
157
result = (
158
pl.DataFrame({"start": [start], "end": [end]})
159
.select(
160
n=pl.business_day_count(
161
"start", "end", week_mask=week_mask, holidays=holidays
162
)
163
)["n"]
164
.item()
165
)
166
expected = np.busday_count(start, end, weekmask=week_mask, holidays=holidays)
167
if start > end and parse_version(np.__version__) < (1, 25):
168
# Bug in old versions of numpy
169
reject()
170
assert result == expected
171
172
173
def test_unequal_length_22018() -> None:
174
with pytest.raises(pl.exceptions.ShapeError):
175
pl.select(
176
pl.business_day_count(
177
pl.Series([date(2020, 1, 1)] * 2),
178
pl.Series([date(2020, 1, 1)] * 3),
179
)
180
)
181
182