Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
RWTH-EBC
GitHub Repository: RWTH-EBC/ebcpy
Path: blob/master/tests/test_preprocessing.py
505 views
1
"""Test-module for all classes inside
2
ebcpy.preprocessing."""
3
import unittest
4
import os
5
from pathlib import Path
6
from datetime import datetime
7
import numpy as np
8
import pandas as pd
9
from ebcpy import preprocessing, TimeSeriesData
10
11
12
class TestPreProcessing(unittest.TestCase):
13
"""Test-class for preprocessing."""
14
15
def setUp(self):
16
"""Called before every test.
17
Used to setup relevant paths and APIs etc."""
18
self.example_dir = Path(__file__).parent.joinpath("data")
19
self.example_data_hdf_path = os.path.normpath(os.path.join(self.example_dir,
20
"example_data.hdf"))
21
22
def test_build_average_on_duplicate_rows(self):
23
"""Test function of preprocessing.build_average_on_duplicate_rows().
24
For an example, see the doctest in the function."""
25
# Choose random number to check if function works in every dimension
26
dim = np.random.randint(1, 1000)
27
vals = np.random.rand(dim)
28
# instantiate df with index all 1
29
df = pd.DataFrame({"idx": np.ones(dim), "val": vals}).set_index("idx")
30
df = preprocessing.build_average_on_duplicate_rows(df)
31
# Check if the length has been reduced to 1
32
self.assertEqual(len(df), 1)
33
# Check if the average is computed correctly
34
self.assertEqual(df.iloc[0].val, np.average(vals))
35
36
def test_convert_index_to_datetime_index(self):
37
"""Test function of preprocessing.convert_index_to_datetime_index().
38
For an example, see the doctest in the function."""
39
40
dim = np.random.randint(1, 10000)
41
df = pd.DataFrame(np.random.rand(dim, 4), columns=list('ABCD'))
42
df_temp = preprocessing.convert_index_to_datetime_index(df)
43
# Check if index is correctly created
44
self.assertIsInstance(df_temp.index, pd.DatetimeIndex)
45
# Check different unit-formats:
46
for unit in ["ms", "s", "h", "d", "min"]:
47
df_temp = preprocessing.convert_index_to_datetime_index(df,
48
unit_of_index=unit)
49
# Test different datetime:
50
df_temp = preprocessing.convert_index_to_datetime_index(df, origin=datetime(2007, 1, 1))
51
# Test wrong unit-input:
52
with self.assertRaises(ValueError):
53
df_temp = preprocessing.convert_index_to_datetime_index(df,
54
unit_of_index="not_a_unit")
55
56
def test_convert_datetime_index_to_float_index(self):
57
"""Test function of preprocessing.convert_datetime_index_to_float_index().
58
For an example, see the doctest in the function."""
59
dim = np.random.randint(1, 10000)
60
df = pd.DataFrame(np.random.rand(dim, 4), columns=list('ABCD'))
61
df_temp = preprocessing.convert_index_to_datetime_index(df.copy())
62
df_temp = preprocessing.convert_datetime_index_to_float_index(df_temp.copy(), offset=0.0)
63
self.assertIsInstance(df_temp.index, type(pd.Index([], dtype="float64")))
64
self.assertTrue(all((df_temp - df) == 0))
65
66
def test_time_based_weighted_mean(self):
67
"""Test function of preprocessing.time_based_weighted_mean().
68
For an example, see the doctest in the function."""
69
time_vec = [datetime(2007, 1, 1, 0, 0),
70
datetime(2007, 1, 1, 0, 0),
71
datetime(2007, 1, 1, 0, 5),
72
datetime(2007, 1, 1, 0, 7),
73
datetime(2007, 1, 1, 0, 10)]
74
df = pd.DataFrame({'A': [1, 2, 4, 3, 6],
75
'B': [11, 12, 14, 13, 16]}, index=time_vec)
76
res = preprocessing.time_based_weighted_mean(df=df)
77
# Check correct return type
78
self.assertIsInstance(res, np.ndarray)
79
# Check correct values
80
self.assertEqual(0, np.mean(np.array([3.55, 13.55]) - res))
81
82
def test_clean_and_space_equally_time_series(self):
83
"""Test function of preprocessing.clean_and_space_equally_time_series().
84
For an example, see the doctest in the function."""
85
np.random.seed(1)
86
dim = 100
87
df = pd.DataFrame(np.random.randint(0, 100, size=(dim, 4)),
88
index=np.arange(0, dim), columns=list('ABCD'))
89
# Check if wrong index input raises error:
90
with self.assertRaises(TypeError):
91
preprocessing.clean_and_space_equally_time_series(df, "1s")
92
with self.assertRaises(TypeError):
93
preprocessing.clean_and_space_equally_time_series(TimeSeriesData(df), "1s")
94
df = preprocessing.convert_index_to_datetime_index(df)
95
df_temp = preprocessing.clean_and_space_equally_time_series(df, "1s")
96
self.assertIsInstance(df_temp, pd.DataFrame)
97
# Trigger upsampling warning
98
with self.assertWarns(UserWarning):
99
preprocessing.clean_and_space_equally_time_series(df, "10s")
100
# Test non-numeric input
101
df.iloc[0, 0] = "not_a_number"
102
with self.assertRaises(ValueError):
103
preprocessing.clean_and_space_equally_time_series(df, "1s")
104
# Trigger NaN-input print statement
105
df.iloc[0, 0] = np.NaN
106
preprocessing.clean_and_space_equally_time_series(df, "1s")
107
# Generate data with floating index and small frequency
108
df = pd.DataFrame(np.random.randint(0, 100, size=(dim, 4)),
109
index=np.arange(0, dim * 0.01, 0.01), columns=list('ABCD'))
110
df = preprocessing.convert_index_to_datetime_index(df)
111
df_temp = preprocessing.clean_and_space_equally_time_series(df, "10ms")
112
new_freq, new_freq_std = preprocessing.get_df_index_frequency_mean_and_std(df_temp.index)
113
self.assertAlmostEqual(new_freq, 0.01, 6)
114
self.assertAlmostEqual(new_freq_std, 0.0, 6)
115
# Generate data with no frequency
116
index = [i * 10 if i <= 50 else (i - 50) * 15 + 500 for i in range(100)]
117
df = pd.DataFrame(np.random.randint(0, 100, size=(dim, 4)),
118
index=index, columns=list('ABCD'))
119
df = preprocessing.convert_index_to_datetime_index(df, "min")
120
# Trigger warning when desired frequency is outside the confidence interval of a no frequency index
121
with self.assertWarns(UserWarning):
122
preprocessing.clean_and_space_equally_time_series(df, "10min")
123
124
def test_low_pass_filter(self):
125
"""Test function of preprocessing.low_pass_filter().
126
For an example, see the doctest in the function."""
127
# Randomly generate all inputs to assure that different
128
# inputs will always work.
129
dim = np.random.randint(1, 10000)
130
vals = np.random.rand(dim)
131
freq = np.random.randint(1, 100) / 100
132
order = np.random.randint(1, 5)
133
output = preprocessing.low_pass_filter(vals, freq, order)
134
self.assertIsInstance(output, np.ndarray)
135
136
def test_moving_average(self):
137
"""Test function of preprocessing.moving_average().
138
For an example, see the doctest in the function."""
139
series = np.sin(np.linspace(-30, 30, 1000))
140
window = np.random.randint(1, len(series))
141
output = preprocessing.moving_average(series, window)
142
self.assertIsInstance(output, np.ndarray)
143
144
def test_create_on_off_signal(self):
145
"""Test function of preprocessing.create_on_off_signal().
146
For an example, see the doctest in the function."""
147
df = pd.DataFrame()
148
with self.assertRaises(IndexError):
149
# Give too many new names
150
preprocessing.create_on_off_signal(df, col_names=["Dummy"], threshold=None,
151
col_names_new=["One", "too much"])
152
with self.assertRaises(IndexError):
153
# Too many thresholds given
154
preprocessing.create_on_off_signal(df, col_names=["Dummy"],
155
threshold=[1, 2, 3, 4],
156
col_names_new=["Dummy_signal"])
157
time_df = pd.DataFrame({"dummy_P_el": np.sin(np.linspace(-20, 20, 100)) * 100})
158
df = preprocessing.create_on_off_signal(time_df,
159
col_names=["dummy_P_el"],
160
threshold=25,
161
col_names_new=["dummy_signal"])
162
self.assertIsInstance(df["dummy_signal"], pd.Series)
163
self.assertIsInstance(df, pd.DataFrame)
164
165
def test_number_lines_totally_na(self):
166
"""Test function of preprocessing.number_lines_totally_na().
167
For an example, see the doctest in the function."""
168
dim = np.random.randint(100)
169
nan_col = [np.NaN for i in range(dim)]
170
col = np.arange(dim)
171
df_nan = pd.DataFrame({"col_1": nan_col, "col_2": nan_col})
172
df_normal = pd.DataFrame({"col_1": nan_col, "col_2": col})
173
self.assertEqual(preprocessing.number_lines_totally_na(df_nan), dim)
174
self.assertEqual(preprocessing.number_lines_totally_na(df_normal), 0)
175
# Test wrong input
176
with self.assertRaises(TypeError):
177
preprocessing.number_lines_totally_na("not_a_df")
178
179
def test_z_score(self):
180
"""Test function of preprocessing.z_score().
181
For an example, see the doctest in the function."""
182
normal_dis = np.random.normal(0, 1, 1000)
183
res = preprocessing.z_score(normal_dis, limit=2)
184
self.assertIsInstance(res, np.ndarray)
185
186
def test_modified_z_score(self):
187
"""Test function of preprocessing.modified_z_score().
188
For an example, see the doctest in the function."""
189
normal_dis = np.random.normal(0, 1, 1000)
190
res = preprocessing.modified_z_score(normal_dis, limit=2)
191
self.assertIsInstance(res, np.ndarray)
192
193
def test_interquartile_range(self):
194
"""Test function of preprocessing.interquartile_range().
195
For an example, see the doctest in the function."""
196
normal_dis = np.random.normal(0, 1, 1000)
197
res = preprocessing.interquartile_range(normal_dis)
198
self.assertIsInstance(res, np.ndarray)
199
200
def test_cross_validation(self):
201
"""Test function of preprocessing.cross_validation().
202
For an example, see the doctest in the function.
203
"""
204
dim = np.random.randint(100, 1000)
205
test_size = np.random.uniform(low=0.1, high=0.9, size=(1,))[0]
206
x = np.random.rand(dim)
207
y = np.random.rand(dim)
208
ret = preprocessing.cross_validation(x, y, test_size=test_size)
209
self.assertEqual(len(ret), 4)
210
# Compare sizes of test and train-sets
211
self.assertEqual(len(ret[0]), len(ret[2]))
212
self.assertEqual(len(ret[1]), len(ret[3]))
213
214
215
if __name__ == "__main__":
216
unittest.main()
217
218