Path: blob/master/finrl/meta/paper_trading/common.py
732 views
# Disclaimer: Nothing herein is financial advice, and NOT a recommendation to trade real money. Many platforms exist for simulated trading (paper trading) which can be used for building and developing the methods discussed. Please use common sense and always first consult a professional before trading or investing.1# -----------------------------------------------------------------------------------------------------------------------------------------2# Import related modules3from __future__ import annotations45import os6import time78import gym9import torch.nn as nn10from torch import Tensor11from torch.distributions.normal import Normal121314# -----------------------------------------------------------------------------------------------------------------------------------------15# PPO161718class ActorPPO(nn.Module):19def __init__(self, dims: [int], state_dim: int, action_dim: int):20super().__init__()21self.net = build_mlp(dims=[state_dim, *dims, action_dim])22self.action_std_log = nn.Parameter(23torch.zeros((1, action_dim)), requires_grad=True24) # trainable parameter2526def forward(self, state: Tensor) -> Tensor:27return self.net(state).tanh() # action.tanh()2829def get_action(self, state: Tensor) -> (Tensor, Tensor): # for exploration30action_avg = self.net(state)31action_std = self.action_std_log.exp()3233dist = Normal(action_avg, action_std)34action = dist.sample()35logprob = dist.log_prob(action).sum(1)36return action, logprob3738def get_logprob_entropy(self, state: Tensor, action: Tensor) -> (Tensor, Tensor):39action_avg = self.net(state)40action_std = self.action_std_log.exp()4142dist = Normal(action_avg, action_std)43logprob = dist.log_prob(action).sum(1)44entropy = dist.entropy().sum(1)45return logprob, entropy4647@staticmethod48def convert_action_for_env(action: Tensor) -> Tensor:49return action.tanh()505152class CriticPPO(nn.Module):53def __init__(self, dims: [int], state_dim: int, _action_dim: int):54super().__init__()55self.net = build_mlp(dims=[state_dim, *dims, 1])5657def forward(self, state: Tensor) -> Tensor:58return self.net(state) # advantage value596061def build_mlp(dims: [int]) -> nn.Sequential: # MLP (MultiLayer Perceptron)62net_list = []63for i in range(len(dims) - 1):64net_list.extend([nn.Linear(dims[i], dims[i + 1]), nn.ReLU()])65del net_list[-1] # remove the activation of output layer66return nn.Sequential(*net_list)676869class Config:70def __init__(self, agent_class=None, env_class=None, env_args=None):71self.env_class = env_class # env = env_class(**env_args)72self.env_args = env_args # env = env_class(**env_args)7374if env_args is None: # dummy env_args75env_args = {76"env_name": None,77"state_dim": None,78"action_dim": None,79"if_discrete": None,80}81self.env_name = env_args[82"env_name"83] # the name of environment. Be used to set 'cwd'.84self.state_dim = env_args[85"state_dim"86] # vector dimension (feature number) of state87self.action_dim = env_args[88"action_dim"89] # vector dimension (feature number) of action90self.if_discrete = env_args[91"if_discrete"92] # discrete or continuous action space9394self.agent_class = agent_class # agent = agent_class(...)9596"""Arguments for reward shaping"""97self.gamma = 0.99 # discount factor of future rewards98self.reward_scale = 1.0 # an approximate target reward usually be closed to 25699100"""Arguments for training"""101self.gpu_id = int(0) # `int` means the ID of single GPU, -1 means CPU102self.net_dims = (10364,10432,105) # the middle layer dimension of MLP (MultiLayer Perceptron)106self.learning_rate = 6e-5 # 2 ** -14 ~= 6e-5107self.soft_update_tau = 5e-3 # 2 ** -8 ~= 5e-3108self.batch_size = int(128) # num of transitions sampled from replay buffer.109self.horizon_len = int(1102000111) # collect horizon_len step while exploring, then update network112self.buffer_size = (113None # ReplayBuffer size. Empty the ReplayBuffer for on-policy.114)115self.repeat_times = 8.0 # repeatedly update network using ReplayBuffer to keep critic's loss small116117"""Arguments for evaluate"""118self.cwd = None # current working directory to save model. None means set automatically119self.break_step = +np.inf # break training if 'total_step > break_step'120self.eval_times = int(32) # number of times that get episodic cumulative return121self.eval_per_step = int(2e4) # evaluate the agent per training steps122123def init_before_training(self):124if self.cwd is None: # set cwd (current working directory) for saving model125self.cwd = f"./{self.env_name}_{self.agent_class.__name__[5:]}"126os.makedirs(self.cwd, exist_ok=True)127128129def get_gym_env_args(env, if_print: bool) -> dict:130if {"unwrapped", "observation_space", "action_space", "spec"}.issubset(131dir(env)132): # isinstance(env, gym.Env):133env_name = env.unwrapped.spec.id134state_shape = env.observation_space.shape135state_dim = (136state_shape[0] if len(state_shape) == 1 else state_shape137) # sometimes state_dim is a list138139if_discrete = isinstance(env.action_space, gym.spaces.Discrete)140if if_discrete: # make sure it is discrete action space141action_dim = env.action_space.n142elif isinstance(143env.action_space, gym.spaces.Box144): # make sure it is continuous action space145action_dim = env.action_space.shape[0]146147env_args = {148"env_name": env_name,149"state_dim": state_dim,150"action_dim": action_dim,151"if_discrete": if_discrete,152}153print(f"env_args = {repr(env_args)}") if if_print else None154return env_args155156157def kwargs_filter(function, kwargs: dict) -> dict:158import inspect159160sign = inspect.signature(function).parameters.values()161sign = {val.name for val in sign}162common_args = sign.intersection(kwargs.keys())163return {key: kwargs[key] for key in common_args} # filtered kwargs164165166def build_env(env_class=None, env_args=None):167if env_class.__module__ == "gym.envs.registration": # special rule168env = env_class(id=env_args["env_name"])169else:170env = env_class(**kwargs_filter(env_class.__init__, env_args.copy()))171for attr_str in ("env_name", "state_dim", "action_dim", "if_discrete"):172setattr(env, attr_str, env_args[attr_str])173return env174175176class AgentBase:177def __init__(178self,179net_dims: [int],180state_dim: int,181action_dim: int,182gpu_id: int = 0,183args: Config = Config(),184):185self.state_dim = state_dim186self.action_dim = action_dim187188self.gamma = args.gamma189self.batch_size = args.batch_size190self.repeat_times = args.repeat_times191self.reward_scale = args.reward_scale192self.soft_update_tau = args.soft_update_tau193194self.states = None # assert self.states == (1, state_dim)195self.device = torch.device(196f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu"197)198199act_class = getattr(self, "act_class", None)200cri_class = getattr(self, "cri_class", None)201self.act = self.act_target = act_class(net_dims, state_dim, action_dim).to(202self.device203)204self.cri = self.cri_target = (205cri_class(net_dims, state_dim, action_dim).to(self.device)206if cri_class207else self.act208)209210self.act_optimizer = torch.optim.Adam(self.act.parameters(), args.learning_rate)211self.cri_optimizer = (212torch.optim.Adam(self.cri.parameters(), args.learning_rate)213if cri_class214else self.act_optimizer215)216217self.criterion = torch.nn.SmoothL1Loss()218219@staticmethod220def optimizer_update(optimizer, objective: Tensor):221optimizer.zero_grad()222objective.backward()223optimizer.step()224225@staticmethod226def soft_update(227target_net: torch.nn.Module, current_net: torch.nn.Module, tau: float228):229for tar, cur in zip(target_net.parameters(), current_net.parameters()):230tar.data.copy_(cur.data * tau + tar.data * (1.0 - tau))231232233class AgentPPO(AgentBase):234def __init__(235self,236net_dims: [int],237state_dim: int,238action_dim: int,239gpu_id: int = 0,240args: Config = Config(),241):242self.if_off_policy = False243self.act_class = getattr(self, "act_class", ActorPPO)244self.cri_class = getattr(self, "cri_class", CriticPPO)245AgentBase.__init__(self, net_dims, state_dim, action_dim, gpu_id, args)246247self.ratio_clip = getattr(248args, "ratio_clip", 0.25249) # `ratio.clamp(1 - clip, 1 + clip)`250self.lambda_gae_adv = getattr(251args, "lambda_gae_adv", 0.95252) # could be 0.80~0.99253self.lambda_entropy = getattr(254args, "lambda_entropy", 0.01255) # could be 0.00~0.10256self.lambda_entropy = torch.tensor(257self.lambda_entropy, dtype=torch.float32, device=self.device258)259260def explore_env(self, env, horizon_len: int) -> [Tensor]:261states = torch.zeros((horizon_len, self.state_dim), dtype=torch.float32).to(262self.device263)264actions = torch.zeros((horizon_len, self.action_dim), dtype=torch.float32).to(265self.device266)267logprobs = torch.zeros(horizon_len, dtype=torch.float32).to(self.device)268rewards = torch.zeros(horizon_len, dtype=torch.float32).to(self.device)269dones = torch.zeros(horizon_len, dtype=torch.bool).to(self.device)270271ary_state = self.states[0]272273get_action = self.act.get_action274convert = self.act.convert_action_for_env275for i in range(horizon_len):276state = torch.as_tensor(ary_state, dtype=torch.float32, device=self.device)277action, logprob = (t.squeeze(0) for t in get_action(state.unsqueeze(0))[:2])278279ary_action = convert(action).detach().cpu().numpy()280ary_state, reward, done, _ = env.step(ary_action)281if done:282obs = env.reset()283if isinstance(obs, tuple):284obs = obs[0]285ary_state = obs286287states[i] = state288actions[i] = action289logprobs[i] = logprob290rewards[i] = reward291dones[i] = done292293self.states[0] = ary_state294rewards = (rewards * self.reward_scale).unsqueeze(1)295undones = (1 - dones.type(torch.float32)).unsqueeze(1)296return states, actions, logprobs, rewards, undones297298def update_net(self, buffer) -> [float]:299with torch.no_grad():300states, actions, logprobs, rewards, undones = buffer301buffer_size = states.shape[0]302303"""get advantages reward_sums"""304bs = 2**10 # set a smaller 'batch_size' when out of GPU memory.305values = [self.cri(states[i : i + bs]) for i in range(0, buffer_size, bs)]306values = torch.cat(values, dim=0).squeeze(3071308) # values.shape == (buffer_size, )309310advantages = self.get_advantages(311rewards, undones, values312) # advantages.shape == (buffer_size, )313reward_sums = advantages + values # reward_sums.shape == (buffer_size, )314del rewards, undones, values315316advantages = (advantages - advantages.mean()) / (317advantages.std(dim=0) + 1e-5318)319assert logprobs.shape == advantages.shape == reward_sums.shape == (buffer_size,)320321"""update network"""322obj_critics = 0.0323obj_actors = 0.0324325update_times = int(buffer_size * self.repeat_times / self.batch_size)326assert update_times >= 1327for _ in range(update_times):328indices = torch.randint(329buffer_size, size=(self.batch_size,), requires_grad=False330)331state = states[indices]332action = actions[indices]333logprob = logprobs[indices]334advantage = advantages[indices]335reward_sum = reward_sums[indices]336337value = self.cri(state).squeeze(3381339) # critic network predicts the reward_sum (Q value) of state340obj_critic = self.criterion(value, reward_sum)341self.optimizer_update(self.cri_optimizer, obj_critic)342343new_logprob, obj_entropy = self.act.get_logprob_entropy(state, action)344ratio = (new_logprob - logprob.detach()).exp()345surrogate1 = advantage * ratio346surrogate2 = advantage * ratio.clamp(3471 - self.ratio_clip, 1 + self.ratio_clip348)349obj_surrogate = torch.min(surrogate1, surrogate2).mean()350351obj_actor = obj_surrogate + obj_entropy.mean() * self.lambda_entropy352self.optimizer_update(self.act_optimizer, -obj_actor)353354obj_critics += obj_critic.item()355obj_actors += obj_actor.item()356a_std_log = getattr(self.act, "a_std_log", torch.zeros(1)).mean()357return obj_critics / update_times, obj_actors / update_times, a_std_log.item()358359def get_advantages(360self, rewards: Tensor, undones: Tensor, values: Tensor361) -> Tensor:362advantages = torch.empty_like(values) # advantage value363364masks = undones * self.gamma365horizon_len = rewards.shape[0]366367next_state = torch.tensor(self.states, dtype=torch.float32).to(self.device)368next_value = self.cri(next_state).detach()[0, 0]369370advantage = 0 # last_gae_lambda371for t in range(horizon_len - 1, -1, -1):372delta = rewards[t] + masks[t] * next_value - values[t]373advantages[t] = advantage = (374delta + masks[t] * self.lambda_gae_adv * advantage375)376next_value = values[t]377return advantages378379380class PendulumEnv(gym.Wrapper): # a demo of custom gym env381def __init__(self):382gym.logger.set_level(40) # Block warning383gym_env_name = "Pendulum-v0" if gym.__version__ < "0.18.0" else "Pendulum-v1"384super().__init__(env=gym.make(gym_env_name))385386"""the necessary env information when you design a custom env"""387self.env_name = gym_env_name # the name of this env.388self.state_dim = self.observation_space.shape[0] # feature number of state389self.action_dim = self.action_space.shape[0] # feature number of action390self.if_discrete = False # discrete action or continuous action391392def reset(393self,394*,395seed=None,396options=None,397) -> np.ndarray: # reset the agent in env398obs = self.env.reset()399if isinstance(obs, tuple):400obs = obs[0]401return obs402403def step(404self, action: np.ndarray405) -> (np.ndarray, float, bool, dict): # agent interacts in env406# We suggest that adjust action space to (-1, +1) when designing a custom env.407state, reward, done, info_dict = self.env.step(action * 2)408return state.reshape(self.state_dim), float(reward), done, info_dict409410411def train_agent(args: Config):412args.init_before_training()413414env = build_env(args.env_class, args.env_args)415agent = args.agent_class(416args.net_dims, args.state_dim, args.action_dim, gpu_id=args.gpu_id, args=args417)418obs = env.reset()419if isinstance(obs, tuple):420obs = obs[0]421agent.states = obs[np.newaxis, :]422423evaluator = Evaluator(424eval_env=build_env(args.env_class, args.env_args),425eval_per_step=args.eval_per_step,426eval_times=args.eval_times,427cwd=args.cwd,428)429torch.set_grad_enabled(False)430while True: # start training431buffer_items = agent.explore_env(env, args.horizon_len)432433torch.set_grad_enabled(True)434logging_tuple = agent.update_net(buffer_items)435torch.set_grad_enabled(False)436437evaluator.evaluate_and_save(agent.act, args.horizon_len, logging_tuple)438if (evaluator.total_step > args.break_step) or os.path.exists(439f"{args.cwd}/stop"440):441torch.save(agent.act.state_dict(), args.cwd + "/actor.pth")442break # stop training when reach `break_step` or `mkdir cwd/stop`443444445def render_agent(446env_class,447env_args: dict,448net_dims: [int],449agent_class,450actor_path: str,451render_times: int = 8,452):453env = build_env(env_class, env_args)454455state_dim = env_args["state_dim"]456action_dim = env_args["action_dim"]457agent = agent_class(net_dims, state_dim, action_dim, gpu_id=-1)458actor = agent.act459460print(f"| render and load actor from: {actor_path}")461actor.load_state_dict(462torch.load(actor_path, map_location=lambda storage, loc: storage)463)464for i in range(render_times):465cumulative_reward, episode_step = get_rewards_and_steps(466env, actor, if_render=True467)468print(469f"|{i:4} cumulative_reward {cumulative_reward:9.3f} episode_step {episode_step:5.0f}"470)471472473class Evaluator:474def __init__(475self, eval_env, eval_per_step: int = 1e4, eval_times: int = 8, cwd: str = "."476):477self.cwd = cwd478self.env_eval = eval_env479self.eval_step = 0480self.total_step = 0481self.start_time = time.time()482self.eval_times = (483eval_times # number of times that get episodic cumulative return484)485self.eval_per_step = eval_per_step # evaluate the agent per training steps486487self.recorder = []488print(489f"\n| `step`: Number of samples, or total training steps, or running times of `env.step()`."490f"\n| `time`: Time spent from the start of training to this moment."491f"\n| `avgR`: Average value of cumulative rewards, which is the sum of rewards in an episode."492f"\n| `stdR`: Standard dev of cumulative rewards, which is the sum of rewards in an episode."493f"\n| `avgS`: Average of steps in an episode."494f"\n| `objC`: Objective of Critic network. Or call it loss function of critic network."495f"\n| `objA`: Objective of Actor network. It is the average Q value of the critic network."496f"\n| {'step':>8} {'time':>8} | {'avgR':>8} {'stdR':>6} {'avgS':>6} | {'objC':>8} {'objA':>8}"497)498499def evaluate_and_save(self, actor, horizon_len: int, logging_tuple: tuple):500self.total_step += horizon_len501if self.eval_step + self.eval_per_step > self.total_step:502return503self.eval_step = self.total_step504505rewards_steps_ary = [506get_rewards_and_steps(self.env_eval, actor) for _ in range(self.eval_times)507]508rewards_steps_ary = np.array(rewards_steps_ary, dtype=np.float32)509avg_r = rewards_steps_ary[:, 0].mean() # average of cumulative rewards510std_r = rewards_steps_ary[:, 0].std() # std of cumulative rewards511avg_s = rewards_steps_ary[:, 1].mean() # average of steps in an episode512513used_time = time.time() - self.start_time514self.recorder.append((self.total_step, used_time, avg_r))515516print(517f"| {self.total_step:8.2e} {used_time:8.0f} "518f"| {avg_r:8.2f} {std_r:6.2f} {avg_s:6.0f} "519f"| {logging_tuple[0]:8.2f} {logging_tuple[1]:8.2f}"520)521522523def get_rewards_and_steps(524env, actor, if_render: bool = False525) -> (float, int): # cumulative_rewards and episode_steps526device = next(actor.parameters()).device # net.parameters() is a Python generator.527state = env.reset()528if isinstance(state, tuple):529state = state[0]530episode_steps = 0531cumulative_returns = 0.0 # sum of rewards in an episode532for episode_steps in range(12345):533tensor_state = torch.as_tensor(534state, dtype=torch.float32, device=device535).unsqueeze(0)536tensor_action = actor(tensor_state)537action = (538tensor_action.detach().cpu().numpy()[0]539) # not need detach(), because using torch.no_grad() outside540state, reward, done, _ = env.step(action)541cumulative_returns += reward542543if if_render:544env.render()545if done:546break547return cumulative_returns, episode_steps + 1548549550# -----------------------------------------------------------------------------------------------------------------------------------------551# DRL Agent Class552553import torch554555# from elegantrl.agents import AgentA2C556557MODELS = {"ppo": AgentPPO}558OFF_POLICY_MODELS = ["ddpg", "td3", "sac"]559ON_POLICY_MODELS = ["ppo"]560# MODEL_KWARGS = {x: config.__dict__[f"{x.upper()}_PARAMS"] for x in MODELS.keys()}561#562# NOISE = {563# "normal": NormalActionNoise,564# "ornstein_uhlenbeck": OrnsteinUhlenbeckActionNoise,565# }566567568class DRLAgent:569"""Implementations of DRL algorithms570Attributes571----------572env: gym environment class573user-defined class574Methods575-------576get_model()577setup DRL algorithms578train_model()579train DRL algorithms in a train dataset580and output the trained model581DRL_prediction()582make a prediction in a test dataset and get results583"""584585def __init__(self, env, price_array, tech_array, turbulence_array):586self.env = env587self.price_array = price_array588self.tech_array = tech_array589self.turbulence_array = turbulence_array590591def get_model(self, model_name, model_kwargs):592env_config = {593"price_array": self.price_array,594"tech_array": self.tech_array,595"turbulence_array": self.turbulence_array,596"if_train": True,597}598environment = self.env(config=env_config)599env_args = {600"config": env_config,601"env_name": environment.env_name,602"state_dim": environment.state_dim,603"action_dim": environment.action_dim,604"if_discrete": False,605}606agent = MODELS[model_name]607if model_name not in MODELS:608raise NotImplementedError("NotImplementedError")609model = Config(agent_class=agent, env_class=self.env, env_args=env_args)610model.if_off_policy = model_name in OFF_POLICY_MODELS611if model_kwargs is not None:612try:613model.learning_rate = model_kwargs["learning_rate"]614model.batch_size = model_kwargs["batch_size"]615model.gamma = model_kwargs["gamma"]616model.seed = model_kwargs["seed"]617model.net_dims = model_kwargs["net_dimension"]618model.target_step = model_kwargs["target_step"]619model.eval_gap = model_kwargs["eval_gap"]620model.eval_times = model_kwargs["eval_times"]621except BaseException:622raise ValueError(623"Fail to read arguments, please check 'model_kwargs' input."624)625return model626627def train_model(self, model, cwd, total_timesteps=5000):628model.cwd = cwd629model.break_step = total_timesteps630train_agent(model)631632@staticmethod633def DRL_prediction(model_name, cwd, net_dimension, environment):634if model_name not in MODELS:635raise NotImplementedError("NotImplementedError")636agent_class = MODELS[model_name]637environment.env_num = 1638agent = agent_class(639net_dimension, environment.state_dim, environment.action_dim640)641actor = agent.act642# load agent643try:644cwd = cwd + "/actor.pth"645print(f"| load actor from: {cwd}")646actor.load_state_dict(647torch.load(cwd, map_location=lambda storage, loc: storage)648)649act = actor650device = agent.device651except BaseException:652raise ValueError("Fail to load agent!")653654# test on the testing env655_torch = torch656state = environment.reset()657episode_returns = [] # the cumulative_return / initial_account658episode_total_assets = [environment.initial_total_asset]659with _torch.no_grad():660for i in range(environment.max_step):661s_tensor = _torch.as_tensor((state,), device=device)662a_tensor = act(s_tensor) # action_tanh = act.forward()663action = (664a_tensor.detach().cpu().numpy()[0]665) # not need detach(), because with torch.no_grad() outside666state, reward, done, _ = environment.step(action)667668total_asset = (669environment.amount670+ (671environment.price_ary[environment.day] * environment.stocks672).sum()673)674episode_total_assets.append(total_asset)675episode_return = total_asset / environment.initial_total_asset676episode_returns.append(episode_return)677if done:678break679print("Test Finished!")680# return episode total_assets on testing data681print("episode_return", episode_return)682return episode_total_assets683684685# -----------------------------------------------------------------------------------------------------------------------------------------686# Train & Test Functions687688from finrl.meta.data_processor import DataProcessor689690# construct environment691692693def train(694start_date,695end_date,696ticker_list,697data_source,698time_interval,699technical_indicator_list,700drl_lib,701env,702model_name,703if_vix=True,704**kwargs,705):706# download data707dp = DataProcessor(data_source, **kwargs)708data = dp.download_data(ticker_list, start_date, end_date, time_interval)709data = dp.clean_data(data)710data = dp.add_technical_indicator(data, technical_indicator_list)711if if_vix:712data = dp.add_vix(data)713else:714data = dp.add_turbulence(data)715price_array, tech_array, turbulence_array = dp.df_to_array(data, if_vix)716env_config = {717"price_array": price_array,718"tech_array": tech_array,719"turbulence_array": turbulence_array,720"if_train": True,721}722env_instance = env(config=env_config)723724# read parameters725cwd = kwargs.get("cwd", "./" + str(model_name))726727if drl_lib == "elegantrl":728DRLAgent_erl = DRLAgent729break_step = kwargs.get("break_step", 1e6)730erl_params = kwargs.get("erl_params")731agent = DRLAgent_erl(732env=env,733price_array=price_array,734tech_array=tech_array,735turbulence_array=turbulence_array,736)737model = agent.get_model(model_name, model_kwargs=erl_params)738trained_model = agent.train_model(739model=model, cwd=cwd, total_timesteps=break_step740)741742743# -----------------------------------------------------------------------------------------------------------------------------------------744745746def test(747start_date,748end_date,749ticker_list,750data_source,751time_interval,752technical_indicator_list,753drl_lib,754env,755model_name,756if_vix=True,757**kwargs,758):759# import data processor760from finrl.meta.data_processor import DataProcessor761762# fetch data763dp = DataProcessor(data_source, **kwargs)764data = dp.download_data(ticker_list, start_date, end_date, time_interval)765data = dp.clean_data(data)766data = dp.add_technical_indicator(data, technical_indicator_list)767768if if_vix:769data = dp.add_vix(data)770else:771data = dp.add_turbulence(data)772price_array, tech_array, turbulence_array = dp.df_to_array(data, if_vix)773774env_config = {775"price_array": price_array,776"tech_array": tech_array,777"turbulence_array": turbulence_array,778"if_train": False,779}780env_instance = env(config=env_config)781782# load elegantrl needs state dim, action dim and net dim783net_dimension = kwargs.get("net_dimension", 2**7)784cwd = kwargs.get("cwd", "./" + str(model_name))785print("price_array: ", len(price_array))786787if drl_lib == "elegantrl":788DRLAgent_erl = DRLAgent789episode_total_assets = DRLAgent_erl.DRL_prediction(790model_name=model_name,791cwd=cwd,792net_dimension=net_dimension,793environment=env_instance,794)795return episode_total_assets796797798# -----------------------------------------------------------------------------------------------------------------------------------------799800import alpaca_trade_api as tradeapi801import pandas_market_calendars as tc802import numpy as np803import pandas as pd804import yfinance as yf805806807def get_trading_days(start, end):808nyse = tc.get_calendar("NYSE")809df = nyse.date_range_htf("1D", pd.Timestamp(start), pd.Timestamp(end))810# df = nyse.sessions_in_range(811# pd.Timestamp(start, tz=pytz.UTC), pd.Timestamp(end, tz=pytz.UTC)812# )813trading_days = []814for day in df:815trading_days.append(str(day)[:10])816return trading_days817818819def alpaca_history(key, secret, url, start, end):820api = tradeapi.REST(key, secret, url, "v2")821trading_days = get_trading_days(start, end)822df = pd.DataFrame()823for day in trading_days:824df = df.append(825api.get_portfolio_history(date_start=day, timeframe="5Min").df.iloc[:78]826)827equities = df.equity.values828cumu_returns = equities / equities[0]829cumu_returns = cumu_returns[~np.isnan(cumu_returns)]830831return df, cumu_returns832833834def DIA_history(start):835data_df = yf.download(["^DJI"], start=start, interval="5m")836data_df = data_df.iloc[:]837baseline_returns = data_df["Adj Close"].values / data_df["Adj Close"].values[0]838return data_df, baseline_returns839840841# -----------------------------------------------------------------------------------------------------------------------------------------842843844