Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
AI4Finance-Foundation
GitHub Repository: AI4Finance-Foundation/FinRL
Path: blob/master/unit_tests/preprocessors/test_groupby_scaler.py
728 views
1
from __future__ import annotations
2
3
import pandas as pd
4
import pytest
5
from sklearn.preprocessing import MinMaxScaler
6
7
from finrl.meta.preprocessor.preprocessors import GroupByScaler
8
9
test_dataframe = pd.DataFrame(
10
{
11
"tic": ["A", "B", "A", "B", "A", "B"],
12
"feature_1": [5.0, 3.0, 9.0, 12.0, 0.0, 5.0],
13
"feature_2": [9.0, 11.0, 7.0, 3.0, 9.0, 13.0],
14
}
15
)
16
17
18
def test_fit_transform():
19
scaler = GroupByScaler(by="tic")
20
transformed_df = scaler.fit_transform(test_dataframe)
21
assert pytest.approx(transformed_df["feature_1"].tolist()) == [
22
5 / 9,
23
1 / 4,
24
1.0,
25
1.0,
26
0.0,
27
5 / 12,
28
]
29
assert pytest.approx(transformed_df["feature_2"].tolist()) == [
30
1.0,
31
11 / 13,
32
7 / 9,
33
3 / 13,
34
1.0,
35
1.0,
36
]
37
38
39
def test_fit_transform_specific_column():
40
scaler = GroupByScaler(by="tic", columns=["feature_1"])
41
transformed_df = scaler.fit_transform(test_dataframe)
42
assert pytest.approx(transformed_df["feature_1"].tolist()) == [
43
5 / 9,
44
1 / 4,
45
1.0,
46
1.0,
47
0.0,
48
5 / 12,
49
]
50
assert pytest.approx(transformed_df["feature_2"].tolist()) == [
51
9.0,
52
11.0,
53
7.0,
54
3.0,
55
9.0,
56
13.0,
57
]
58
59
60
def test_fit_transform_other_df():
61
scaler = GroupByScaler(by="tic")
62
scaler.fit(test_dataframe)
63
another_dataframe = pd.DataFrame(
64
{
65
"tic": ["A", "B", "A", "B"],
66
"feature_1": [7.0, 5.0, 8.0, 10.0],
67
"feature_2": [1.0, 3.0, 2.0, 5.0],
68
}
69
)
70
transformed_df = scaler.transform(another_dataframe)
71
assert pytest.approx(transformed_df["feature_1"].tolist()) == [
72
7 / 9,
73
5 / 12,
74
8 / 9,
75
5 / 6,
76
]
77
assert pytest.approx(transformed_df["feature_2"].tolist()) == [
78
1 / 9,
79
3 / 13,
80
2 / 9,
81
5 / 13,
82
]
83
84
85
def test_minmax_fit_transform():
86
scaler = GroupByScaler(by="tic", scaler=MinMaxScaler)
87
transformed_df = scaler.fit_transform(test_dataframe)
88
assert pytest.approx(transformed_df["feature_1"].tolist()) == [
89
5 / 9,
90
0.0,
91
1.0,
92
1.0,
93
0.0,
94
2 / 9,
95
]
96
assert pytest.approx(transformed_df["feature_2"].tolist()) == [
97
1.0,
98
4 / 5,
99
0.0,
100
0.0,
101
1.0,
102
1.0,
103
]
104
105
106
def test_minmax_fit_transform_specific_column():
107
scaler = GroupByScaler(by="tic", scaler=MinMaxScaler, columns=["feature_1"])
108
transformed_df = scaler.fit_transform(test_dataframe)
109
assert pytest.approx(transformed_df["feature_1"].tolist()) == [
110
5 / 9,
111
0.0,
112
1.0,
113
1.0,
114
0.0,
115
2 / 9,
116
]
117
assert pytest.approx(transformed_df["feature_2"].tolist()) == [
118
9.0,
119
11.0,
120
7.0,
121
3.0,
122
9.0,
123
13.0,
124
]
125
126
127
def test_minmax_fit_transform_other_df():
128
scaler = GroupByScaler(by="tic", scaler=MinMaxScaler)
129
scaler.fit(test_dataframe)
130
another_dataframe = pd.DataFrame(
131
{
132
"tic": ["A", "B", "A", "B"],
133
"feature_1": [7.0, 5.0, 8.0, 10.0],
134
"feature_2": [1.0, 3.0, 2.0, 5.0],
135
}
136
)
137
transformed_df = scaler.transform(another_dataframe)
138
assert pytest.approx(transformed_df["feature_1"].tolist()) == [
139
7 / 9,
140
2 / 9,
141
8 / 9,
142
7 / 9,
143
]
144
assert pytest.approx(transformed_df["feature_2"].tolist()) == [
145
-3.0,
146
0.0,
147
-5 / 2,
148
0.2,
149
]
150
151