Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
AI4Finance-Foundation
GitHub Repository: AI4Finance-Foundation/FinRL
Path: blob/master/finrl/meta/paper_trading/common.py
732 views
1
# Disclaimer: Nothing herein is financial advice, and NOT a recommendation to trade real money. Many platforms exist for simulated trading (paper trading) which can be used for building and developing the methods discussed. Please use common sense and always first consult a professional before trading or investing.
2
# -----------------------------------------------------------------------------------------------------------------------------------------
3
# Import related modules
4
from __future__ import annotations
5
6
import os
7
import time
8
9
import gym
10
import torch.nn as nn
11
from torch import Tensor
12
from torch.distributions.normal import Normal
13
14
15
# -----------------------------------------------------------------------------------------------------------------------------------------
16
# PPO
17
18
19
class ActorPPO(nn.Module):
20
def __init__(self, dims: [int], state_dim: int, action_dim: int):
21
super().__init__()
22
self.net = build_mlp(dims=[state_dim, *dims, action_dim])
23
self.action_std_log = nn.Parameter(
24
torch.zeros((1, action_dim)), requires_grad=True
25
) # trainable parameter
26
27
def forward(self, state: Tensor) -> Tensor:
28
return self.net(state).tanh() # action.tanh()
29
30
def get_action(self, state: Tensor) -> (Tensor, Tensor): # for exploration
31
action_avg = self.net(state)
32
action_std = self.action_std_log.exp()
33
34
dist = Normal(action_avg, action_std)
35
action = dist.sample()
36
logprob = dist.log_prob(action).sum(1)
37
return action, logprob
38
39
def get_logprob_entropy(self, state: Tensor, action: Tensor) -> (Tensor, Tensor):
40
action_avg = self.net(state)
41
action_std = self.action_std_log.exp()
42
43
dist = Normal(action_avg, action_std)
44
logprob = dist.log_prob(action).sum(1)
45
entropy = dist.entropy().sum(1)
46
return logprob, entropy
47
48
@staticmethod
49
def convert_action_for_env(action: Tensor) -> Tensor:
50
return action.tanh()
51
52
53
class CriticPPO(nn.Module):
54
def __init__(self, dims: [int], state_dim: int, _action_dim: int):
55
super().__init__()
56
self.net = build_mlp(dims=[state_dim, *dims, 1])
57
58
def forward(self, state: Tensor) -> Tensor:
59
return self.net(state) # advantage value
60
61
62
def build_mlp(dims: [int]) -> nn.Sequential: # MLP (MultiLayer Perceptron)
63
net_list = []
64
for i in range(len(dims) - 1):
65
net_list.extend([nn.Linear(dims[i], dims[i + 1]), nn.ReLU()])
66
del net_list[-1] # remove the activation of output layer
67
return nn.Sequential(*net_list)
68
69
70
class Config:
71
def __init__(self, agent_class=None, env_class=None, env_args=None):
72
self.env_class = env_class # env = env_class(**env_args)
73
self.env_args = env_args # env = env_class(**env_args)
74
75
if env_args is None: # dummy env_args
76
env_args = {
77
"env_name": None,
78
"state_dim": None,
79
"action_dim": None,
80
"if_discrete": None,
81
}
82
self.env_name = env_args[
83
"env_name"
84
] # the name of environment. Be used to set 'cwd'.
85
self.state_dim = env_args[
86
"state_dim"
87
] # vector dimension (feature number) of state
88
self.action_dim = env_args[
89
"action_dim"
90
] # vector dimension (feature number) of action
91
self.if_discrete = env_args[
92
"if_discrete"
93
] # discrete or continuous action space
94
95
self.agent_class = agent_class # agent = agent_class(...)
96
97
"""Arguments for reward shaping"""
98
self.gamma = 0.99 # discount factor of future rewards
99
self.reward_scale = 1.0 # an approximate target reward usually be closed to 256
100
101
"""Arguments for training"""
102
self.gpu_id = int(0) # `int` means the ID of single GPU, -1 means CPU
103
self.net_dims = (
104
64,
105
32,
106
) # the middle layer dimension of MLP (MultiLayer Perceptron)
107
self.learning_rate = 6e-5 # 2 ** -14 ~= 6e-5
108
self.soft_update_tau = 5e-3 # 2 ** -8 ~= 5e-3
109
self.batch_size = int(128) # num of transitions sampled from replay buffer.
110
self.horizon_len = int(
111
2000
112
) # collect horizon_len step while exploring, then update network
113
self.buffer_size = (
114
None # ReplayBuffer size. Empty the ReplayBuffer for on-policy.
115
)
116
self.repeat_times = 8.0 # repeatedly update network using ReplayBuffer to keep critic's loss small
117
118
"""Arguments for evaluate"""
119
self.cwd = None # current working directory to save model. None means set automatically
120
self.break_step = +np.inf # break training if 'total_step > break_step'
121
self.eval_times = int(32) # number of times that get episodic cumulative return
122
self.eval_per_step = int(2e4) # evaluate the agent per training steps
123
124
def init_before_training(self):
125
if self.cwd is None: # set cwd (current working directory) for saving model
126
self.cwd = f"./{self.env_name}_{self.agent_class.__name__[5:]}"
127
os.makedirs(self.cwd, exist_ok=True)
128
129
130
def get_gym_env_args(env, if_print: bool) -> dict:
131
if {"unwrapped", "observation_space", "action_space", "spec"}.issubset(
132
dir(env)
133
): # isinstance(env, gym.Env):
134
env_name = env.unwrapped.spec.id
135
state_shape = env.observation_space.shape
136
state_dim = (
137
state_shape[0] if len(state_shape) == 1 else state_shape
138
) # sometimes state_dim is a list
139
140
if_discrete = isinstance(env.action_space, gym.spaces.Discrete)
141
if if_discrete: # make sure it is discrete action space
142
action_dim = env.action_space.n
143
elif isinstance(
144
env.action_space, gym.spaces.Box
145
): # make sure it is continuous action space
146
action_dim = env.action_space.shape[0]
147
148
env_args = {
149
"env_name": env_name,
150
"state_dim": state_dim,
151
"action_dim": action_dim,
152
"if_discrete": if_discrete,
153
}
154
print(f"env_args = {repr(env_args)}") if if_print else None
155
return env_args
156
157
158
def kwargs_filter(function, kwargs: dict) -> dict:
159
import inspect
160
161
sign = inspect.signature(function).parameters.values()
162
sign = {val.name for val in sign}
163
common_args = sign.intersection(kwargs.keys())
164
return {key: kwargs[key] for key in common_args} # filtered kwargs
165
166
167
def build_env(env_class=None, env_args=None):
168
if env_class.__module__ == "gym.envs.registration": # special rule
169
env = env_class(id=env_args["env_name"])
170
else:
171
env = env_class(**kwargs_filter(env_class.__init__, env_args.copy()))
172
for attr_str in ("env_name", "state_dim", "action_dim", "if_discrete"):
173
setattr(env, attr_str, env_args[attr_str])
174
return env
175
176
177
class AgentBase:
178
def __init__(
179
self,
180
net_dims: [int],
181
state_dim: int,
182
action_dim: int,
183
gpu_id: int = 0,
184
args: Config = Config(),
185
):
186
self.state_dim = state_dim
187
self.action_dim = action_dim
188
189
self.gamma = args.gamma
190
self.batch_size = args.batch_size
191
self.repeat_times = args.repeat_times
192
self.reward_scale = args.reward_scale
193
self.soft_update_tau = args.soft_update_tau
194
195
self.states = None # assert self.states == (1, state_dim)
196
self.device = torch.device(
197
f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu"
198
)
199
200
act_class = getattr(self, "act_class", None)
201
cri_class = getattr(self, "cri_class", None)
202
self.act = self.act_target = act_class(net_dims, state_dim, action_dim).to(
203
self.device
204
)
205
self.cri = self.cri_target = (
206
cri_class(net_dims, state_dim, action_dim).to(self.device)
207
if cri_class
208
else self.act
209
)
210
211
self.act_optimizer = torch.optim.Adam(self.act.parameters(), args.learning_rate)
212
self.cri_optimizer = (
213
torch.optim.Adam(self.cri.parameters(), args.learning_rate)
214
if cri_class
215
else self.act_optimizer
216
)
217
218
self.criterion = torch.nn.SmoothL1Loss()
219
220
@staticmethod
221
def optimizer_update(optimizer, objective: Tensor):
222
optimizer.zero_grad()
223
objective.backward()
224
optimizer.step()
225
226
@staticmethod
227
def soft_update(
228
target_net: torch.nn.Module, current_net: torch.nn.Module, tau: float
229
):
230
for tar, cur in zip(target_net.parameters(), current_net.parameters()):
231
tar.data.copy_(cur.data * tau + tar.data * (1.0 - tau))
232
233
234
class AgentPPO(AgentBase):
235
def __init__(
236
self,
237
net_dims: [int],
238
state_dim: int,
239
action_dim: int,
240
gpu_id: int = 0,
241
args: Config = Config(),
242
):
243
self.if_off_policy = False
244
self.act_class = getattr(self, "act_class", ActorPPO)
245
self.cri_class = getattr(self, "cri_class", CriticPPO)
246
AgentBase.__init__(self, net_dims, state_dim, action_dim, gpu_id, args)
247
248
self.ratio_clip = getattr(
249
args, "ratio_clip", 0.25
250
) # `ratio.clamp(1 - clip, 1 + clip)`
251
self.lambda_gae_adv = getattr(
252
args, "lambda_gae_adv", 0.95
253
) # could be 0.80~0.99
254
self.lambda_entropy = getattr(
255
args, "lambda_entropy", 0.01
256
) # could be 0.00~0.10
257
self.lambda_entropy = torch.tensor(
258
self.lambda_entropy, dtype=torch.float32, device=self.device
259
)
260
261
def explore_env(self, env, horizon_len: int) -> [Tensor]:
262
states = torch.zeros((horizon_len, self.state_dim), dtype=torch.float32).to(
263
self.device
264
)
265
actions = torch.zeros((horizon_len, self.action_dim), dtype=torch.float32).to(
266
self.device
267
)
268
logprobs = torch.zeros(horizon_len, dtype=torch.float32).to(self.device)
269
rewards = torch.zeros(horizon_len, dtype=torch.float32).to(self.device)
270
dones = torch.zeros(horizon_len, dtype=torch.bool).to(self.device)
271
272
ary_state = self.states[0]
273
274
get_action = self.act.get_action
275
convert = self.act.convert_action_for_env
276
for i in range(horizon_len):
277
state = torch.as_tensor(ary_state, dtype=torch.float32, device=self.device)
278
action, logprob = (t.squeeze(0) for t in get_action(state.unsqueeze(0))[:2])
279
280
ary_action = convert(action).detach().cpu().numpy()
281
ary_state, reward, done, _ = env.step(ary_action)
282
if done:
283
obs = env.reset()
284
if isinstance(obs, tuple):
285
obs = obs[0]
286
ary_state = obs
287
288
states[i] = state
289
actions[i] = action
290
logprobs[i] = logprob
291
rewards[i] = reward
292
dones[i] = done
293
294
self.states[0] = ary_state
295
rewards = (rewards * self.reward_scale).unsqueeze(1)
296
undones = (1 - dones.type(torch.float32)).unsqueeze(1)
297
return states, actions, logprobs, rewards, undones
298
299
def update_net(self, buffer) -> [float]:
300
with torch.no_grad():
301
states, actions, logprobs, rewards, undones = buffer
302
buffer_size = states.shape[0]
303
304
"""get advantages reward_sums"""
305
bs = 2**10 # set a smaller 'batch_size' when out of GPU memory.
306
values = [self.cri(states[i : i + bs]) for i in range(0, buffer_size, bs)]
307
values = torch.cat(values, dim=0).squeeze(
308
1
309
) # values.shape == (buffer_size, )
310
311
advantages = self.get_advantages(
312
rewards, undones, values
313
) # advantages.shape == (buffer_size, )
314
reward_sums = advantages + values # reward_sums.shape == (buffer_size, )
315
del rewards, undones, values
316
317
advantages = (advantages - advantages.mean()) / (
318
advantages.std(dim=0) + 1e-5
319
)
320
assert logprobs.shape == advantages.shape == reward_sums.shape == (buffer_size,)
321
322
"""update network"""
323
obj_critics = 0.0
324
obj_actors = 0.0
325
326
update_times = int(buffer_size * self.repeat_times / self.batch_size)
327
assert update_times >= 1
328
for _ in range(update_times):
329
indices = torch.randint(
330
buffer_size, size=(self.batch_size,), requires_grad=False
331
)
332
state = states[indices]
333
action = actions[indices]
334
logprob = logprobs[indices]
335
advantage = advantages[indices]
336
reward_sum = reward_sums[indices]
337
338
value = self.cri(state).squeeze(
339
1
340
) # critic network predicts the reward_sum (Q value) of state
341
obj_critic = self.criterion(value, reward_sum)
342
self.optimizer_update(self.cri_optimizer, obj_critic)
343
344
new_logprob, obj_entropy = self.act.get_logprob_entropy(state, action)
345
ratio = (new_logprob - logprob.detach()).exp()
346
surrogate1 = advantage * ratio
347
surrogate2 = advantage * ratio.clamp(
348
1 - self.ratio_clip, 1 + self.ratio_clip
349
)
350
obj_surrogate = torch.min(surrogate1, surrogate2).mean()
351
352
obj_actor = obj_surrogate + obj_entropy.mean() * self.lambda_entropy
353
self.optimizer_update(self.act_optimizer, -obj_actor)
354
355
obj_critics += obj_critic.item()
356
obj_actors += obj_actor.item()
357
a_std_log = getattr(self.act, "a_std_log", torch.zeros(1)).mean()
358
return obj_critics / update_times, obj_actors / update_times, a_std_log.item()
359
360
def get_advantages(
361
self, rewards: Tensor, undones: Tensor, values: Tensor
362
) -> Tensor:
363
advantages = torch.empty_like(values) # advantage value
364
365
masks = undones * self.gamma
366
horizon_len = rewards.shape[0]
367
368
next_state = torch.tensor(self.states, dtype=torch.float32).to(self.device)
369
next_value = self.cri(next_state).detach()[0, 0]
370
371
advantage = 0 # last_gae_lambda
372
for t in range(horizon_len - 1, -1, -1):
373
delta = rewards[t] + masks[t] * next_value - values[t]
374
advantages[t] = advantage = (
375
delta + masks[t] * self.lambda_gae_adv * advantage
376
)
377
next_value = values[t]
378
return advantages
379
380
381
class PendulumEnv(gym.Wrapper): # a demo of custom gym env
382
def __init__(self):
383
gym.logger.set_level(40) # Block warning
384
gym_env_name = "Pendulum-v0" if gym.__version__ < "0.18.0" else "Pendulum-v1"
385
super().__init__(env=gym.make(gym_env_name))
386
387
"""the necessary env information when you design a custom env"""
388
self.env_name = gym_env_name # the name of this env.
389
self.state_dim = self.observation_space.shape[0] # feature number of state
390
self.action_dim = self.action_space.shape[0] # feature number of action
391
self.if_discrete = False # discrete action or continuous action
392
393
def reset(
394
self,
395
*,
396
seed=None,
397
options=None,
398
) -> np.ndarray: # reset the agent in env
399
obs = self.env.reset()
400
if isinstance(obs, tuple):
401
obs = obs[0]
402
return obs
403
404
def step(
405
self, action: np.ndarray
406
) -> (np.ndarray, float, bool, dict): # agent interacts in env
407
# We suggest that adjust action space to (-1, +1) when designing a custom env.
408
state, reward, done, info_dict = self.env.step(action * 2)
409
return state.reshape(self.state_dim), float(reward), done, info_dict
410
411
412
def train_agent(args: Config):
413
args.init_before_training()
414
415
env = build_env(args.env_class, args.env_args)
416
agent = args.agent_class(
417
args.net_dims, args.state_dim, args.action_dim, gpu_id=args.gpu_id, args=args
418
)
419
obs = env.reset()
420
if isinstance(obs, tuple):
421
obs = obs[0]
422
agent.states = obs[np.newaxis, :]
423
424
evaluator = Evaluator(
425
eval_env=build_env(args.env_class, args.env_args),
426
eval_per_step=args.eval_per_step,
427
eval_times=args.eval_times,
428
cwd=args.cwd,
429
)
430
torch.set_grad_enabled(False)
431
while True: # start training
432
buffer_items = agent.explore_env(env, args.horizon_len)
433
434
torch.set_grad_enabled(True)
435
logging_tuple = agent.update_net(buffer_items)
436
torch.set_grad_enabled(False)
437
438
evaluator.evaluate_and_save(agent.act, args.horizon_len, logging_tuple)
439
if (evaluator.total_step > args.break_step) or os.path.exists(
440
f"{args.cwd}/stop"
441
):
442
torch.save(agent.act.state_dict(), args.cwd + "/actor.pth")
443
break # stop training when reach `break_step` or `mkdir cwd/stop`
444
445
446
def render_agent(
447
env_class,
448
env_args: dict,
449
net_dims: [int],
450
agent_class,
451
actor_path: str,
452
render_times: int = 8,
453
):
454
env = build_env(env_class, env_args)
455
456
state_dim = env_args["state_dim"]
457
action_dim = env_args["action_dim"]
458
agent = agent_class(net_dims, state_dim, action_dim, gpu_id=-1)
459
actor = agent.act
460
461
print(f"| render and load actor from: {actor_path}")
462
actor.load_state_dict(
463
torch.load(actor_path, map_location=lambda storage, loc: storage)
464
)
465
for i in range(render_times):
466
cumulative_reward, episode_step = get_rewards_and_steps(
467
env, actor, if_render=True
468
)
469
print(
470
f"|{i:4} cumulative_reward {cumulative_reward:9.3f} episode_step {episode_step:5.0f}"
471
)
472
473
474
class Evaluator:
475
def __init__(
476
self, eval_env, eval_per_step: int = 1e4, eval_times: int = 8, cwd: str = "."
477
):
478
self.cwd = cwd
479
self.env_eval = eval_env
480
self.eval_step = 0
481
self.total_step = 0
482
self.start_time = time.time()
483
self.eval_times = (
484
eval_times # number of times that get episodic cumulative return
485
)
486
self.eval_per_step = eval_per_step # evaluate the agent per training steps
487
488
self.recorder = []
489
print(
490
f"\n| `step`: Number of samples, or total training steps, or running times of `env.step()`."
491
f"\n| `time`: Time spent from the start of training to this moment."
492
f"\n| `avgR`: Average value of cumulative rewards, which is the sum of rewards in an episode."
493
f"\n| `stdR`: Standard dev of cumulative rewards, which is the sum of rewards in an episode."
494
f"\n| `avgS`: Average of steps in an episode."
495
f"\n| `objC`: Objective of Critic network. Or call it loss function of critic network."
496
f"\n| `objA`: Objective of Actor network. It is the average Q value of the critic network."
497
f"\n| {'step':>8} {'time':>8} | {'avgR':>8} {'stdR':>6} {'avgS':>6} | {'objC':>8} {'objA':>8}"
498
)
499
500
def evaluate_and_save(self, actor, horizon_len: int, logging_tuple: tuple):
501
self.total_step += horizon_len
502
if self.eval_step + self.eval_per_step > self.total_step:
503
return
504
self.eval_step = self.total_step
505
506
rewards_steps_ary = [
507
get_rewards_and_steps(self.env_eval, actor) for _ in range(self.eval_times)
508
]
509
rewards_steps_ary = np.array(rewards_steps_ary, dtype=np.float32)
510
avg_r = rewards_steps_ary[:, 0].mean() # average of cumulative rewards
511
std_r = rewards_steps_ary[:, 0].std() # std of cumulative rewards
512
avg_s = rewards_steps_ary[:, 1].mean() # average of steps in an episode
513
514
used_time = time.time() - self.start_time
515
self.recorder.append((self.total_step, used_time, avg_r))
516
517
print(
518
f"| {self.total_step:8.2e} {used_time:8.0f} "
519
f"| {avg_r:8.2f} {std_r:6.2f} {avg_s:6.0f} "
520
f"| {logging_tuple[0]:8.2f} {logging_tuple[1]:8.2f}"
521
)
522
523
524
def get_rewards_and_steps(
525
env, actor, if_render: bool = False
526
) -> (float, int): # cumulative_rewards and episode_steps
527
device = next(actor.parameters()).device # net.parameters() is a Python generator.
528
state = env.reset()
529
if isinstance(state, tuple):
530
state = state[0]
531
episode_steps = 0
532
cumulative_returns = 0.0 # sum of rewards in an episode
533
for episode_steps in range(12345):
534
tensor_state = torch.as_tensor(
535
state, dtype=torch.float32, device=device
536
).unsqueeze(0)
537
tensor_action = actor(tensor_state)
538
action = (
539
tensor_action.detach().cpu().numpy()[0]
540
) # not need detach(), because using torch.no_grad() outside
541
state, reward, done, _ = env.step(action)
542
cumulative_returns += reward
543
544
if if_render:
545
env.render()
546
if done:
547
break
548
return cumulative_returns, episode_steps + 1
549
550
551
# -----------------------------------------------------------------------------------------------------------------------------------------
552
# DRL Agent Class
553
554
import torch
555
556
# from elegantrl.agents import AgentA2C
557
558
MODELS = {"ppo": AgentPPO}
559
OFF_POLICY_MODELS = ["ddpg", "td3", "sac"]
560
ON_POLICY_MODELS = ["ppo"]
561
# MODEL_KWARGS = {x: config.__dict__[f"{x.upper()}_PARAMS"] for x in MODELS.keys()}
562
#
563
# NOISE = {
564
# "normal": NormalActionNoise,
565
# "ornstein_uhlenbeck": OrnsteinUhlenbeckActionNoise,
566
# }
567
568
569
class DRLAgent:
570
"""Implementations of DRL algorithms
571
Attributes
572
----------
573
env: gym environment class
574
user-defined class
575
Methods
576
-------
577
get_model()
578
setup DRL algorithms
579
train_model()
580
train DRL algorithms in a train dataset
581
and output the trained model
582
DRL_prediction()
583
make a prediction in a test dataset and get results
584
"""
585
586
def __init__(self, env, price_array, tech_array, turbulence_array):
587
self.env = env
588
self.price_array = price_array
589
self.tech_array = tech_array
590
self.turbulence_array = turbulence_array
591
592
def get_model(self, model_name, model_kwargs):
593
env_config = {
594
"price_array": self.price_array,
595
"tech_array": self.tech_array,
596
"turbulence_array": self.turbulence_array,
597
"if_train": True,
598
}
599
environment = self.env(config=env_config)
600
env_args = {
601
"config": env_config,
602
"env_name": environment.env_name,
603
"state_dim": environment.state_dim,
604
"action_dim": environment.action_dim,
605
"if_discrete": False,
606
}
607
agent = MODELS[model_name]
608
if model_name not in MODELS:
609
raise NotImplementedError("NotImplementedError")
610
model = Config(agent_class=agent, env_class=self.env, env_args=env_args)
611
model.if_off_policy = model_name in OFF_POLICY_MODELS
612
if model_kwargs is not None:
613
try:
614
model.learning_rate = model_kwargs["learning_rate"]
615
model.batch_size = model_kwargs["batch_size"]
616
model.gamma = model_kwargs["gamma"]
617
model.seed = model_kwargs["seed"]
618
model.net_dims = model_kwargs["net_dimension"]
619
model.target_step = model_kwargs["target_step"]
620
model.eval_gap = model_kwargs["eval_gap"]
621
model.eval_times = model_kwargs["eval_times"]
622
except BaseException:
623
raise ValueError(
624
"Fail to read arguments, please check 'model_kwargs' input."
625
)
626
return model
627
628
def train_model(self, model, cwd, total_timesteps=5000):
629
model.cwd = cwd
630
model.break_step = total_timesteps
631
train_agent(model)
632
633
@staticmethod
634
def DRL_prediction(model_name, cwd, net_dimension, environment):
635
if model_name not in MODELS:
636
raise NotImplementedError("NotImplementedError")
637
agent_class = MODELS[model_name]
638
environment.env_num = 1
639
agent = agent_class(
640
net_dimension, environment.state_dim, environment.action_dim
641
)
642
actor = agent.act
643
# load agent
644
try:
645
cwd = cwd + "/actor.pth"
646
print(f"| load actor from: {cwd}")
647
actor.load_state_dict(
648
torch.load(cwd, map_location=lambda storage, loc: storage)
649
)
650
act = actor
651
device = agent.device
652
except BaseException:
653
raise ValueError("Fail to load agent!")
654
655
# test on the testing env
656
_torch = torch
657
state = environment.reset()
658
episode_returns = [] # the cumulative_return / initial_account
659
episode_total_assets = [environment.initial_total_asset]
660
with _torch.no_grad():
661
for i in range(environment.max_step):
662
s_tensor = _torch.as_tensor((state,), device=device)
663
a_tensor = act(s_tensor) # action_tanh = act.forward()
664
action = (
665
a_tensor.detach().cpu().numpy()[0]
666
) # not need detach(), because with torch.no_grad() outside
667
state, reward, done, _ = environment.step(action)
668
669
total_asset = (
670
environment.amount
671
+ (
672
environment.price_ary[environment.day] * environment.stocks
673
).sum()
674
)
675
episode_total_assets.append(total_asset)
676
episode_return = total_asset / environment.initial_total_asset
677
episode_returns.append(episode_return)
678
if done:
679
break
680
print("Test Finished!")
681
# return episode total_assets on testing data
682
print("episode_return", episode_return)
683
return episode_total_assets
684
685
686
# -----------------------------------------------------------------------------------------------------------------------------------------
687
# Train & Test Functions
688
689
from finrl.meta.data_processor import DataProcessor
690
691
# construct environment
692
693
694
def train(
695
start_date,
696
end_date,
697
ticker_list,
698
data_source,
699
time_interval,
700
technical_indicator_list,
701
drl_lib,
702
env,
703
model_name,
704
if_vix=True,
705
**kwargs,
706
):
707
# download data
708
dp = DataProcessor(data_source, **kwargs)
709
data = dp.download_data(ticker_list, start_date, end_date, time_interval)
710
data = dp.clean_data(data)
711
data = dp.add_technical_indicator(data, technical_indicator_list)
712
if if_vix:
713
data = dp.add_vix(data)
714
else:
715
data = dp.add_turbulence(data)
716
price_array, tech_array, turbulence_array = dp.df_to_array(data, if_vix)
717
env_config = {
718
"price_array": price_array,
719
"tech_array": tech_array,
720
"turbulence_array": turbulence_array,
721
"if_train": True,
722
}
723
env_instance = env(config=env_config)
724
725
# read parameters
726
cwd = kwargs.get("cwd", "./" + str(model_name))
727
728
if drl_lib == "elegantrl":
729
DRLAgent_erl = DRLAgent
730
break_step = kwargs.get("break_step", 1e6)
731
erl_params = kwargs.get("erl_params")
732
agent = DRLAgent_erl(
733
env=env,
734
price_array=price_array,
735
tech_array=tech_array,
736
turbulence_array=turbulence_array,
737
)
738
model = agent.get_model(model_name, model_kwargs=erl_params)
739
trained_model = agent.train_model(
740
model=model, cwd=cwd, total_timesteps=break_step
741
)
742
743
744
# -----------------------------------------------------------------------------------------------------------------------------------------
745
746
747
def test(
748
start_date,
749
end_date,
750
ticker_list,
751
data_source,
752
time_interval,
753
technical_indicator_list,
754
drl_lib,
755
env,
756
model_name,
757
if_vix=True,
758
**kwargs,
759
):
760
# import data processor
761
from finrl.meta.data_processor import DataProcessor
762
763
# fetch data
764
dp = DataProcessor(data_source, **kwargs)
765
data = dp.download_data(ticker_list, start_date, end_date, time_interval)
766
data = dp.clean_data(data)
767
data = dp.add_technical_indicator(data, technical_indicator_list)
768
769
if if_vix:
770
data = dp.add_vix(data)
771
else:
772
data = dp.add_turbulence(data)
773
price_array, tech_array, turbulence_array = dp.df_to_array(data, if_vix)
774
775
env_config = {
776
"price_array": price_array,
777
"tech_array": tech_array,
778
"turbulence_array": turbulence_array,
779
"if_train": False,
780
}
781
env_instance = env(config=env_config)
782
783
# load elegantrl needs state dim, action dim and net dim
784
net_dimension = kwargs.get("net_dimension", 2**7)
785
cwd = kwargs.get("cwd", "./" + str(model_name))
786
print("price_array: ", len(price_array))
787
788
if drl_lib == "elegantrl":
789
DRLAgent_erl = DRLAgent
790
episode_total_assets = DRLAgent_erl.DRL_prediction(
791
model_name=model_name,
792
cwd=cwd,
793
net_dimension=net_dimension,
794
environment=env_instance,
795
)
796
return episode_total_assets
797
798
799
# -----------------------------------------------------------------------------------------------------------------------------------------
800
801
import alpaca_trade_api as tradeapi
802
import pandas_market_calendars as tc
803
import numpy as np
804
import pandas as pd
805
import yfinance as yf
806
807
808
def get_trading_days(start, end):
809
nyse = tc.get_calendar("NYSE")
810
df = nyse.date_range_htf("1D", pd.Timestamp(start), pd.Timestamp(end))
811
# df = nyse.sessions_in_range(
812
# pd.Timestamp(start, tz=pytz.UTC), pd.Timestamp(end, tz=pytz.UTC)
813
# )
814
trading_days = []
815
for day in df:
816
trading_days.append(str(day)[:10])
817
return trading_days
818
819
820
def alpaca_history(key, secret, url, start, end):
821
api = tradeapi.REST(key, secret, url, "v2")
822
trading_days = get_trading_days(start, end)
823
df = pd.DataFrame()
824
for day in trading_days:
825
df = df.append(
826
api.get_portfolio_history(date_start=day, timeframe="5Min").df.iloc[:78]
827
)
828
equities = df.equity.values
829
cumu_returns = equities / equities[0]
830
cumu_returns = cumu_returns[~np.isnan(cumu_returns)]
831
832
return df, cumu_returns
833
834
835
def DIA_history(start):
836
data_df = yf.download(["^DJI"], start=start, interval="5m")
837
data_df = data_df.iloc[:]
838
baseline_returns = data_df["Adj Close"].values / data_df["Adj Close"].values[0]
839
return data_df, baseline_returns
840
841
842
# -----------------------------------------------------------------------------------------------------------------------------------------
843
844