Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
AI4Finance-Foundation
GitHub Repository: AI4Finance-Foundation/FinRL
Path: blob/master/finrl/agents/elegantrl/models.py
732 views
1
"""
2
DRL models from ElegantRL: https://github.com/AI4Finance-Foundation/ElegantRL
3
"""
4
5
from __future__ import annotations
6
7
import torch
8
from elegantrl.agents import *
9
from elegantrl.train.config import Config
10
from elegantrl.train.run import train_agent
11
12
MODELS = {
13
"ddpg": AgentDDPG,
14
"td3": AgentTD3,
15
"sac": AgentSAC,
16
"ppo": AgentPPO,
17
"a2c": AgentA2C,
18
}
19
OFF_POLICY_MODELS = ["ddpg", "td3", "sac"]
20
ON_POLICY_MODELS = ["ppo"]
21
# MODEL_KWARGS = {x: config.__dict__[f"{x.upper()}_PARAMS"] for x in MODELS.keys()}
22
#
23
# NOISE = {
24
# "normal": NormalActionNoise,
25
# "ornstein_uhlenbeck": OrnsteinUhlenbeckActionNoise,
26
# }
27
28
29
class DRLAgent:
30
"""Implementations of DRL algorithms
31
Attributes
32
----------
33
env: gym environment class
34
user-defined class
35
Methods
36
-------
37
get_model()
38
setup DRL algorithms
39
train_model()
40
train DRL algorithms in a train dataset
41
and output the trained model
42
DRL_prediction()
43
make a prediction in a test dataset and get results
44
"""
45
46
def __init__(self, env, price_array, tech_array, turbulence_array):
47
self.env = env
48
self.price_array = price_array
49
self.tech_array = tech_array
50
self.turbulence_array = turbulence_array
51
52
def get_model(self, model_name, model_kwargs):
53
self.env_config = {
54
"price_array": self.price_array,
55
"tech_array": self.tech_array,
56
"turbulence_array": self.turbulence_array,
57
"if_train": True,
58
}
59
self.model_kwargs = model_kwargs
60
self.gamma = model_kwargs.get("gamma", 0.985)
61
62
env = self.env
63
env.env_num = 1
64
agent = MODELS[model_name]
65
if model_name not in MODELS:
66
raise NotImplementedError("NotImplementedError")
67
68
stock_dim = self.price_array.shape[1]
69
self.state_dim = 1 + 2 + 3 * stock_dim + self.tech_array.shape[1]
70
self.action_dim = stock_dim
71
self.env_args = {
72
"env_name": "StockEnv",
73
"config": self.env_config,
74
"state_dim": self.state_dim,
75
"action_dim": self.action_dim,
76
"if_discrete": False,
77
"max_step": self.price_array.shape[0] - 1,
78
}
79
80
model = Config(agent_class=agent, env_class=env, env_args=self.env_args)
81
model.if_off_policy = model_name in OFF_POLICY_MODELS
82
if model_kwargs is not None:
83
try:
84
model.break_step = int(
85
2e5
86
) # break training if 'total_step > break_step'
87
model.net_dims = (
88
128,
89
64,
90
) # the middle layer dimension of MultiLayer Perceptron
91
model.gamma = self.gamma # discount factor of future rewards
92
model.horizon_len = model.max_step
93
model.repeat_times = 16 # repeatedly update network using ReplayBuffer to keep critic's loss small
94
model.learning_rate = model_kwargs.get("learning_rate", 1e-4)
95
model.state_value_tau = 0.1 # the tau of normalize for value and state `std = (1-std)*std + tau*std`
96
model.eval_times = model_kwargs.get("eval_times", 2**5)
97
model.eval_per_step = int(2e4)
98
except BaseException:
99
raise ValueError(
100
"Fail to read arguments, please check 'model_kwargs' input."
101
)
102
return model
103
104
def train_model(self, model, cwd, total_timesteps=5000):
105
model.cwd = cwd
106
model.break_step = total_timesteps
107
train_agent(model)
108
109
@staticmethod
110
def DRL_prediction(model_name, cwd, net_dimension, environment, env_args):
111
import torch
112
113
gpu_id = 0 # >=0 means GPU ID, -1 means CPU
114
agent_class = MODELS[model_name]
115
stock_dim = env_args["price_array"].shape[1]
116
state_dim = 1 + 2 + 3 * stock_dim + env_args["tech_array"].shape[1]
117
action_dim = stock_dim
118
env_args = {
119
"env_num": 1,
120
"env_name": "StockEnv",
121
"state_dim": state_dim,
122
"action_dim": action_dim,
123
"if_discrete": False,
124
"max_step": env_args["price_array"].shape[0] - 1,
125
"config": env_args,
126
}
127
128
actor_path = f"{cwd}/act.pth"
129
net_dim = [2**7]
130
131
"""init"""
132
env = environment
133
env_class = env
134
args = Config(agent_class=agent_class, env_class=env_class, env_args=env_args)
135
args.cwd = cwd
136
act = agent_class(
137
net_dim, env.state_dim, env.action_dim, gpu_id=gpu_id, args=args
138
).act
139
parameters_dict = {}
140
act = torch.load(actor_path)
141
for name, param in act.named_parameters():
142
parameters_dict[name] = torch.tensor(param.detach().cpu().numpy())
143
144
act.load_state_dict(parameters_dict)
145
146
if_discrete = env.if_discrete
147
device = next(act.parameters()).device
148
state = env.reset()
149
episode_returns = [] # the cumulative_return / initial_account
150
episode_total_assets = [env.initial_total_asset]
151
max_step = env.max_step
152
for steps in range(max_step):
153
s_tensor = torch.as_tensor(
154
state, dtype=torch.float32, device=device
155
).unsqueeze(0)
156
a_tensor = act(s_tensor).argmax(dim=1) if if_discrete else act(s_tensor)
157
action = (
158
a_tensor.detach().cpu().numpy()[0]
159
) # not need detach(), because using torch.no_grad() outside
160
state, reward, done, _ = env.step(action)
161
total_asset = env.amount + (env.price_ary[env.day] * env.stocks).sum()
162
episode_total_assets.append(total_asset)
163
episode_return = total_asset / env.initial_total_asset
164
episode_returns.append(episode_return)
165
if done:
166
break
167
print("Test Finished!")
168
print("episode_retuen", episode_return)
169
return episode_total_assets
170
171