Path: blob/master/unit_tests/preprocessors/test_groupby_scaler.py
728 views
from __future__ import annotations12import pandas as pd3import pytest4from sklearn.preprocessing import MinMaxScaler56from finrl.meta.preprocessor.preprocessors import GroupByScaler78test_dataframe = pd.DataFrame(9{10"tic": ["A", "B", "A", "B", "A", "B"],11"feature_1": [5.0, 3.0, 9.0, 12.0, 0.0, 5.0],12"feature_2": [9.0, 11.0, 7.0, 3.0, 9.0, 13.0],13}14)151617def test_fit_transform():18scaler = GroupByScaler(by="tic")19transformed_df = scaler.fit_transform(test_dataframe)20assert pytest.approx(transformed_df["feature_1"].tolist()) == [215 / 9,221 / 4,231.0,241.0,250.0,265 / 12,27]28assert pytest.approx(transformed_df["feature_2"].tolist()) == [291.0,3011 / 13,317 / 9,323 / 13,331.0,341.0,35]363738def test_fit_transform_specific_column():39scaler = GroupByScaler(by="tic", columns=["feature_1"])40transformed_df = scaler.fit_transform(test_dataframe)41assert pytest.approx(transformed_df["feature_1"].tolist()) == [425 / 9,431 / 4,441.0,451.0,460.0,475 / 12,48]49assert pytest.approx(transformed_df["feature_2"].tolist()) == [509.0,5111.0,527.0,533.0,549.0,5513.0,56]575859def test_fit_transform_other_df():60scaler = GroupByScaler(by="tic")61scaler.fit(test_dataframe)62another_dataframe = pd.DataFrame(63{64"tic": ["A", "B", "A", "B"],65"feature_1": [7.0, 5.0, 8.0, 10.0],66"feature_2": [1.0, 3.0, 2.0, 5.0],67}68)69transformed_df = scaler.transform(another_dataframe)70assert pytest.approx(transformed_df["feature_1"].tolist()) == [717 / 9,725 / 12,738 / 9,745 / 6,75]76assert pytest.approx(transformed_df["feature_2"].tolist()) == [771 / 9,783 / 13,792 / 9,805 / 13,81]828384def test_minmax_fit_transform():85scaler = GroupByScaler(by="tic", scaler=MinMaxScaler)86transformed_df = scaler.fit_transform(test_dataframe)87assert pytest.approx(transformed_df["feature_1"].tolist()) == [885 / 9,890.0,901.0,911.0,920.0,932 / 9,94]95assert pytest.approx(transformed_df["feature_2"].tolist()) == [961.0,974 / 5,980.0,990.0,1001.0,1011.0,102]103104105def test_minmax_fit_transform_specific_column():106scaler = GroupByScaler(by="tic", scaler=MinMaxScaler, columns=["feature_1"])107transformed_df = scaler.fit_transform(test_dataframe)108assert pytest.approx(transformed_df["feature_1"].tolist()) == [1095 / 9,1100.0,1111.0,1121.0,1130.0,1142 / 9,115]116assert pytest.approx(transformed_df["feature_2"].tolist()) == [1179.0,11811.0,1197.0,1203.0,1219.0,12213.0,123]124125126def test_minmax_fit_transform_other_df():127scaler = GroupByScaler(by="tic", scaler=MinMaxScaler)128scaler.fit(test_dataframe)129another_dataframe = pd.DataFrame(130{131"tic": ["A", "B", "A", "B"],132"feature_1": [7.0, 5.0, 8.0, 10.0],133"feature_2": [1.0, 3.0, 2.0, 5.0],134}135)136transformed_df = scaler.transform(another_dataframe)137assert pytest.approx(transformed_df["feature_1"].tolist()) == [1387 / 9,1392 / 9,1408 / 9,1417 / 9,142]143assert pytest.approx(transformed_df["feature_2"].tolist()) == [144-3.0,1450.0,146-5 / 2,1470.2,148]149150151