CoCalc -- test_groupby

GitHub Repository: AI4Finance-Foundation/FinRL
Path: blob/master/unit_tests/preprocessors/test_groupby_scaler.py
⁷²⁸ views
1
from __future__ import annotations
2

3
import pandas as pd
4
import pytest
5
from sklearn.preprocessing import MinMaxScaler
6

7
from finrl.meta.preprocessor.preprocessors import GroupByScaler
8

9
test_dataframe = pd.DataFrame(
10
    {
11
        "tic": ["A", "B", "A", "B", "A", "B"],
12
        "feature_1": [5.0, 3.0, 9.0, 12.0, 0.0, 5.0],
13
        "feature_2": [9.0, 11.0, 7.0, 3.0, 9.0, 13.0],
14
    }
15
)
16

17

18
def test_fit_transform():
19
    scaler = GroupByScaler(by="tic")
20
    transformed_df = scaler.fit_transform(test_dataframe)
21
    assert pytest.approx(transformed_df["feature_1"].tolist()) == [
22
        5 / 9,
23
        1 / 4,
24
        1.0,
25
        1.0,
26
        0.0,
27
        5 / 12,
28
    ]
29
    assert pytest.approx(transformed_df["feature_2"].tolist()) == [
30
        1.0,
31
        11 / 13,
32
        7 / 9,
33
        3 / 13,
34
        1.0,
35
        1.0,
36
    ]
37

38

39
def test_fit_transform_specific_column():
40
    scaler = GroupByScaler(by="tic", columns=["feature_1"])
41
    transformed_df = scaler.fit_transform(test_dataframe)
42
    assert pytest.approx(transformed_df["feature_1"].tolist()) == [
43
        5 / 9,
44
        1 / 4,
45
        1.0,
46
        1.0,
47
        0.0,
48
        5 / 12,
49
    ]
50
    assert pytest.approx(transformed_df["feature_2"].tolist()) == [
51
        9.0,
52
        11.0,
53
        7.0,
54
        3.0,
55
        9.0,
56
        13.0,
57
    ]
58

59

60
def test_fit_transform_other_df():
61
    scaler = GroupByScaler(by="tic")
62
    scaler.fit(test_dataframe)
63
    another_dataframe = pd.DataFrame(
64
        {
65
            "tic": ["A", "B", "A", "B"],
66
            "feature_1": [7.0, 5.0, 8.0, 10.0],
67
            "feature_2": [1.0, 3.0, 2.0, 5.0],
68
        }
69
    )
70
    transformed_df = scaler.transform(another_dataframe)
71
    assert pytest.approx(transformed_df["feature_1"].tolist()) == [
72
        7 / 9,
73
        5 / 12,
74
        8 / 9,
75
        5 / 6,
76
    ]
77
    assert pytest.approx(transformed_df["feature_2"].tolist()) == [
78
        1 / 9,
79
        3 / 13,
80
        2 / 9,
81
        5 / 13,
82
    ]
83

84

85
def test_minmax_fit_transform():
86
    scaler = GroupByScaler(by="tic", scaler=MinMaxScaler)
87
    transformed_df = scaler.fit_transform(test_dataframe)
88
    assert pytest.approx(transformed_df["feature_1"].tolist()) == [
89
        5 / 9,
90
        0.0,
91
        1.0,
92
        1.0,
93
        0.0,
94
        2 / 9,
95
    ]
96
    assert pytest.approx(transformed_df["feature_2"].tolist()) == [
97
        1.0,
98
        4 / 5,
99
        0.0,
100
        0.0,
101
        1.0,
102
        1.0,
103
    ]
104

105

106
def test_minmax_fit_transform_specific_column():
107
    scaler = GroupByScaler(by="tic", scaler=MinMaxScaler, columns=["feature_1"])
108
    transformed_df = scaler.fit_transform(test_dataframe)
109
    assert pytest.approx(transformed_df["feature_1"].tolist()) == [
110
        5 / 9,
111
        0.0,
112
        1.0,
113
        1.0,
114
        0.0,
115
        2 / 9,
116
    ]
117
    assert pytest.approx(transformed_df["feature_2"].tolist()) == [
118
        9.0,
119
        11.0,
120
        7.0,
121
        3.0,
122
        9.0,
123
        13.0,
124
    ]
125

126

127
def test_minmax_fit_transform_other_df():
128
    scaler = GroupByScaler(by="tic", scaler=MinMaxScaler)
129
    scaler.fit(test_dataframe)
130
    another_dataframe = pd.DataFrame(
131
        {
132
            "tic": ["A", "B", "A", "B"],
133
            "feature_1": [7.0, 5.0, 8.0, 10.0],
134
            "feature_2": [1.0, 3.0, 2.0, 5.0],
135
        }
136
    )
137
    transformed_df = scaler.transform(another_dataframe)
138
    assert pytest.approx(transformed_df["feature_1"].tolist()) == [
139
        7 / 9,
140
        2 / 9,
141
        8 / 9,
142
        7 / 9,
143
    ]
144
    assert pytest.approx(transformed_df["feature_2"].tolist()) == [
145
        -3.0,
146
        0.0,
147
        -5 / 2,
148
        0.2,
149
    ]
150

151
Product

Resources

Company