CoCalc -- models.py

GitHub Repository: AI4Finance-Foundation/FinRL
Path: blob/master/finrl/agents/elegantrl/models.py
⁷³² views
1
"""
2
DRL models from ElegantRL: https://github.com/AI4Finance-Foundation/ElegantRL
3
"""
4

5
from __future__ import annotations
6

7
import torch
8
from elegantrl.agents import *
9
from elegantrl.train.config import Config
10
from elegantrl.train.run import train_agent
11

12
MODELS = {
13
    "ddpg": AgentDDPG,
14
    "td3": AgentTD3,
15
    "sac": AgentSAC,
16
    "ppo": AgentPPO,
17
    "a2c": AgentA2C,
18
}
19
OFF_POLICY_MODELS = ["ddpg", "td3", "sac"]
20
ON_POLICY_MODELS = ["ppo"]
21
# MODEL_KWARGS = {x: config.__dict__[f"{x.upper()}_PARAMS"] for x in MODELS.keys()}
22
#
23
# NOISE = {
24
#     "normal": NormalActionNoise,
25
#     "ornstein_uhlenbeck": OrnsteinUhlenbeckActionNoise,
26
# }
27

28

29
class DRLAgent:
30
    """Implementations of DRL algorithms
31
    Attributes
32
    ----------
33
        env: gym environment class
34
            user-defined class
35
    Methods
36
    -------
37
        get_model()
38
            setup DRL algorithms
39
        train_model()
40
            train DRL algorithms in a train dataset
41
            and output the trained model
42
        DRL_prediction()
43
            make a prediction in a test dataset and get results
44
    """
45

46
    def __init__(self, env, price_array, tech_array, turbulence_array):
47
        self.env = env
48
        self.price_array = price_array
49
        self.tech_array = tech_array
50
        self.turbulence_array = turbulence_array
51

52
    def get_model(self, model_name, model_kwargs):
53
        self.env_config = {
54
            "price_array": self.price_array,
55
            "tech_array": self.tech_array,
56
            "turbulence_array": self.turbulence_array,
57
            "if_train": True,
58
        }
59
        self.model_kwargs = model_kwargs
60
        self.gamma = model_kwargs.get("gamma", 0.985)
61

62
        env = self.env
63
        env.env_num = 1
64
        agent = MODELS[model_name]
65
        if model_name not in MODELS:
66
            raise NotImplementedError("NotImplementedError")
67

68
        stock_dim = self.price_array.shape[1]
69
        self.state_dim = 1 + 2 + 3 * stock_dim + self.tech_array.shape[1]
70
        self.action_dim = stock_dim
71
        self.env_args = {
72
            "env_name": "StockEnv",
73
            "config": self.env_config,
74
            "state_dim": self.state_dim,
75
            "action_dim": self.action_dim,
76
            "if_discrete": False,
77
            "max_step": self.price_array.shape[0] - 1,
78
        }
79

80
        model = Config(agent_class=agent, env_class=env, env_args=self.env_args)
81
        model.if_off_policy = model_name in OFF_POLICY_MODELS
82
        if model_kwargs is not None:
83
            try:
84
                model.break_step = int(
85
                    2e5
86
                )  # break training if 'total_step > break_step'
87
                model.net_dims = (
88
                    128,
89
                    64,
90
                )  # the middle layer dimension of MultiLayer Perceptron
91
                model.gamma = self.gamma  # discount factor of future rewards
92
                model.horizon_len = model.max_step
93
                model.repeat_times = 16  # repeatedly update network using ReplayBuffer to keep critic's loss small
94
                model.learning_rate = model_kwargs.get("learning_rate", 1e-4)
95
                model.state_value_tau = 0.1  # the tau of normalize for value and state `std = (1-std)*std + tau*std`
96
                model.eval_times = model_kwargs.get("eval_times", 2**5)
97
                model.eval_per_step = int(2e4)
98
            except BaseException:
99
                raise ValueError(
100
                    "Fail to read arguments, please check 'model_kwargs' input."
101
                )
102
        return model
103

104
    def train_model(self, model, cwd, total_timesteps=5000):
105
        model.cwd = cwd
106
        model.break_step = total_timesteps
107
        train_agent(model)
108

109
    @staticmethod
110
    def DRL_prediction(model_name, cwd, net_dimension, environment, env_args):
111
        import torch
112

113
        gpu_id = 0  # >=0 means GPU ID, -1 means CPU
114
        agent_class = MODELS[model_name]
115
        stock_dim = env_args["price_array"].shape[1]
116
        state_dim = 1 + 2 + 3 * stock_dim + env_args["tech_array"].shape[1]
117
        action_dim = stock_dim
118
        env_args = {
119
            "env_num": 1,
120
            "env_name": "StockEnv",
121
            "state_dim": state_dim,
122
            "action_dim": action_dim,
123
            "if_discrete": False,
124
            "max_step": env_args["price_array"].shape[0] - 1,
125
            "config": env_args,
126
        }
127

128
        actor_path = f"{cwd}/act.pth"
129
        net_dim = [2**7]
130

131
        """init"""
132
        env = environment
133
        env_class = env
134
        args = Config(agent_class=agent_class, env_class=env_class, env_args=env_args)
135
        args.cwd = cwd
136
        act = agent_class(
137
            net_dim, env.state_dim, env.action_dim, gpu_id=gpu_id, args=args
138
        ).act
139
        parameters_dict = {}
140
        act = torch.load(actor_path)
141
        for name, param in act.named_parameters():
142
            parameters_dict[name] = torch.tensor(param.detach().cpu().numpy())
143

144
        act.load_state_dict(parameters_dict)
145

146
        if_discrete = env.if_discrete
147
        device = next(act.parameters()).device
148
        state = env.reset()
149
        episode_returns = []  # the cumulative_return / initial_account
150
        episode_total_assets = [env.initial_total_asset]
151
        max_step = env.max_step
152
        for steps in range(max_step):
153
            s_tensor = torch.as_tensor(
154
                state, dtype=torch.float32, device=device
155
            ).unsqueeze(0)
156
            a_tensor = act(s_tensor).argmax(dim=1) if if_discrete else act(s_tensor)
157
            action = (
158
                a_tensor.detach().cpu().numpy()[0]
159
            )  # not need detach(), because using torch.no_grad() outside
160
            state, reward, done, _ = env.step(action)
161
            total_asset = env.amount + (env.price_ary[env.day] * env.stocks).sum()
162
            episode_total_assets.append(total_asset)
163
            episode_return = total_asset / env.initial_total_asset
164
            episode_returns.append(episode_return)
165
            if done:
166
                break
167
        print("Test Finished!")
168
        print("episode_retuen", episode_return)
169
        return episode_total_assets
170

171
Product

Resources

Company