CoCalc -- test_preprocessing.py

GitHub Repository: RWTH-EBC/ebcpy
Path: blob/master/tests/test_preprocessing.py
⁵⁰⁵ views
1
"""Test-module for all classes inside
2
ebcpy.preprocessing."""
3
import unittest
4
import os
5
from pathlib import Path
6
from datetime import datetime
7
import numpy as np
8
import pandas as pd
9
from ebcpy import preprocessing, TimeSeriesData
10

11

12
class TestPreProcessing(unittest.TestCase):
13
    """Test-class for preprocessing."""
14

15
    def setUp(self):
16
        """Called before every test.
17
        Used to setup relevant paths and APIs etc."""
18
        self.example_dir = Path(__file__).parent.joinpath("data")
19
        self.example_data_hdf_path = os.path.normpath(os.path.join(self.example_dir,
20
                                                                   "example_data.hdf"))
21

22
    def test_build_average_on_duplicate_rows(self):
23
        """Test function of preprocessing.build_average_on_duplicate_rows().
24
        For an example, see the doctest in the function."""
25
        # Choose random number to check if function works in every dimension
26
        dim = np.random.randint(1, 1000)
27
        vals = np.random.rand(dim)
28
        # instantiate df with index all 1
29
        df = pd.DataFrame({"idx": np.ones(dim), "val": vals}).set_index("idx")
30
        df = preprocessing.build_average_on_duplicate_rows(df)
31
        # Check if the length has been reduced to 1
32
        self.assertEqual(len(df), 1)
33
        # Check if the average is computed correctly
34
        self.assertEqual(df.iloc[0].val, np.average(vals))
35

36
    def test_convert_index_to_datetime_index(self):
37
        """Test function of preprocessing.convert_index_to_datetime_index().
38
        For an example, see the doctest in the function."""
39

40
        dim = np.random.randint(1, 10000)
41
        df = pd.DataFrame(np.random.rand(dim, 4), columns=list('ABCD'))
42
        df_temp = preprocessing.convert_index_to_datetime_index(df)
43
        # Check if index is correctly created
44
        self.assertIsInstance(df_temp.index, pd.DatetimeIndex)
45
        # Check different unit-formats:
46
        for unit in ["ms", "s", "h", "d", "min"]:
47
            df_temp = preprocessing.convert_index_to_datetime_index(df,
48
                                                                    unit_of_index=unit)
49
        # Test different datetime:
50
        df_temp = preprocessing.convert_index_to_datetime_index(df, origin=datetime(2007, 1, 1))
51
        # Test wrong unit-input:
52
        with self.assertRaises(ValueError):
53
            df_temp = preprocessing.convert_index_to_datetime_index(df,
54
                                                                    unit_of_index="not_a_unit")
55

56
    def test_convert_datetime_index_to_float_index(self):
57
        """Test function of preprocessing.convert_datetime_index_to_float_index().
58
         For an example, see the doctest in the function."""
59
        dim = np.random.randint(1, 10000)
60
        df = pd.DataFrame(np.random.rand(dim, 4), columns=list('ABCD'))
61
        df_temp = preprocessing.convert_index_to_datetime_index(df.copy())
62
        df_temp = preprocessing.convert_datetime_index_to_float_index(df_temp.copy(), offset=0.0)
63
        self.assertIsInstance(df_temp.index, type(pd.Index([], dtype="float64")))
64
        self.assertTrue(all((df_temp - df) == 0))
65

66
    def test_time_based_weighted_mean(self):
67
        """Test function of preprocessing.time_based_weighted_mean().
68
         For an example, see the doctest in the function."""
69
        time_vec = [datetime(2007, 1, 1, 0, 0),
70
                    datetime(2007, 1, 1, 0, 0),
71
                    datetime(2007, 1, 1, 0, 5),
72
                    datetime(2007, 1, 1, 0, 7),
73
                    datetime(2007, 1, 1, 0, 10)]
74
        df = pd.DataFrame({'A': [1, 2, 4, 3, 6],
75
                           'B': [11, 12, 14, 13, 16]}, index=time_vec)
76
        res = preprocessing.time_based_weighted_mean(df=df)
77
        # Check correct return type
78
        self.assertIsInstance(res, np.ndarray)
79
        # Check correct values
80
        self.assertEqual(0, np.mean(np.array([3.55, 13.55]) - res))
81

82
    def test_clean_and_space_equally_time_series(self):
83
        """Test function of preprocessing.clean_and_space_equally_time_series().
84
        For an example, see the doctest in the function."""
85
        np.random.seed(1)
86
        dim = 100
87
        df = pd.DataFrame(np.random.randint(0, 100, size=(dim, 4)),
88
                          index=np.arange(0, dim), columns=list('ABCD'))
89
        # Check if wrong index input raises error:
90
        with self.assertRaises(TypeError):
91
            preprocessing.clean_and_space_equally_time_series(df, "1s")
92
        with self.assertRaises(TypeError):
93
            preprocessing.clean_and_space_equally_time_series(TimeSeriesData(df), "1s")
94
        df = preprocessing.convert_index_to_datetime_index(df)
95
        df_temp = preprocessing.clean_and_space_equally_time_series(df, "1s")
96
        self.assertIsInstance(df_temp, pd.DataFrame)
97
        # Trigger upsampling warning
98
        with self.assertWarns(UserWarning):
99
            preprocessing.clean_and_space_equally_time_series(df, "10s")
100
        # Test non-numeric input
101
        df.iloc[0, 0] = "not_a_number"
102
        with self.assertRaises(ValueError):
103
            preprocessing.clean_and_space_equally_time_series(df, "1s")
104
        # Trigger NaN-input print statement
105
        df.iloc[0, 0] = np.NaN
106
        preprocessing.clean_and_space_equally_time_series(df, "1s")
107
        # Generate data with floating index and small frequency
108
        df = pd.DataFrame(np.random.randint(0, 100, size=(dim, 4)),
109
                          index=np.arange(0, dim * 0.01, 0.01), columns=list('ABCD'))
110
        df = preprocessing.convert_index_to_datetime_index(df)
111
        df_temp = preprocessing.clean_and_space_equally_time_series(df, "10ms")
112
        new_freq, new_freq_std = preprocessing.get_df_index_frequency_mean_and_std(df_temp.index)
113
        self.assertAlmostEqual(new_freq, 0.01, 6)
114
        self.assertAlmostEqual(new_freq_std, 0.0, 6)
115
        # Generate data with no frequency
116
        index = [i * 10 if i <= 50 else (i - 50) * 15 + 500 for i in range(100)]
117
        df = pd.DataFrame(np.random.randint(0, 100, size=(dim, 4)),
118
                          index=index, columns=list('ABCD'))
119
        df = preprocessing.convert_index_to_datetime_index(df, "min")
120
        # Trigger warning when desired frequency is outside the confidence interval of a no frequency index
121
        with self.assertWarns(UserWarning):
122
            preprocessing.clean_and_space_equally_time_series(df, "10min")
123

124
    def test_low_pass_filter(self):
125
        """Test function of preprocessing.low_pass_filter().
126
        For an example, see the doctest in the function."""
127
        # Randomly generate all inputs to assure that different
128
        # inputs will always work.
129
        dim = np.random.randint(1, 10000)
130
        vals = np.random.rand(dim)
131
        freq = np.random.randint(1, 100) / 100
132
        order = np.random.randint(1, 5)
133
        output = preprocessing.low_pass_filter(vals, freq, order)
134
        self.assertIsInstance(output, np.ndarray)
135

136
    def test_moving_average(self):
137
        """Test function of preprocessing.moving_average().
138
        For an example, see the doctest in the function."""
139
        series = np.sin(np.linspace(-30, 30, 1000))
140
        window = np.random.randint(1, len(series))
141
        output = preprocessing.moving_average(series, window)
142
        self.assertIsInstance(output, np.ndarray)
143

144
    def test_create_on_off_signal(self):
145
        """Test function of preprocessing.create_on_off_signal().
146
        For an example, see the doctest in the function."""
147
        df = pd.DataFrame()
148
        with self.assertRaises(IndexError):
149
            # Give too many new names
150
            preprocessing.create_on_off_signal(df, col_names=["Dummy"], threshold=None,
151
                                               col_names_new=["One", "too much"])
152
        with self.assertRaises(IndexError):
153
            # Too many thresholds given
154
            preprocessing.create_on_off_signal(df, col_names=["Dummy"],
155
                                               threshold=[1, 2, 3, 4],
156
                                               col_names_new=["Dummy_signal"])
157
        time_df = pd.DataFrame({"dummy_P_el": np.sin(np.linspace(-20, 20, 100)) * 100})
158
        df = preprocessing.create_on_off_signal(time_df,
159
                                                col_names=["dummy_P_el"],
160
                                                threshold=25,
161
                                                col_names_new=["dummy_signal"])
162
        self.assertIsInstance(df["dummy_signal"], pd.Series)
163
        self.assertIsInstance(df, pd.DataFrame)
164

165
    def test_number_lines_totally_na(self):
166
        """Test function of preprocessing.number_lines_totally_na().
167
        For an example, see the doctest in the function."""
168
        dim = np.random.randint(100)
169
        nan_col = [np.NaN for i in range(dim)]
170
        col = np.arange(dim)
171
        df_nan = pd.DataFrame({"col_1": nan_col, "col_2": nan_col})
172
        df_normal = pd.DataFrame({"col_1": nan_col, "col_2": col})
173
        self.assertEqual(preprocessing.number_lines_totally_na(df_nan), dim)
174
        self.assertEqual(preprocessing.number_lines_totally_na(df_normal), 0)
175
        # Test wrong input
176
        with self.assertRaises(TypeError):
177
            preprocessing.number_lines_totally_na("not_a_df")
178

179
    def test_z_score(self):
180
        """Test function of preprocessing.z_score().
181
        For an example, see the doctest in the function."""
182
        normal_dis = np.random.normal(0, 1, 1000)
183
        res = preprocessing.z_score(normal_dis, limit=2)
184
        self.assertIsInstance(res, np.ndarray)
185

186
    def test_modified_z_score(self):
187
        """Test function of preprocessing.modified_z_score().
188
        For an example, see the doctest in the function."""
189
        normal_dis = np.random.normal(0, 1, 1000)
190
        res = preprocessing.modified_z_score(normal_dis, limit=2)
191
        self.assertIsInstance(res, np.ndarray)
192

193
    def test_interquartile_range(self):
194
        """Test function of preprocessing.interquartile_range().
195
        For an example, see the doctest in the function."""
196
        normal_dis = np.random.normal(0, 1, 1000)
197
        res = preprocessing.interquartile_range(normal_dis)
198
        self.assertIsInstance(res, np.ndarray)
199

200
    def test_cross_validation(self):
201
        """Test function of preprocessing.cross_validation().
202
        For an example, see the doctest in the function.
203
        """
204
        dim = np.random.randint(100, 1000)
205
        test_size = np.random.uniform(low=0.1, high=0.9, size=(1,))[0]
206
        x = np.random.rand(dim)
207
        y = np.random.rand(dim)
208
        ret = preprocessing.cross_validation(x, y, test_size=test_size)
209
        self.assertEqual(len(ret), 4)
210
        # Compare sizes of test and train-sets
211
        self.assertEqual(len(ret[0]), len(ret[2]))
212
        self.assertEqual(len(ret[1]), len(ret[3]))
213

214

215
if __name__ == "__main__":
216
    unittest.main()
217

218
Product

Resources

Company