Path: blob/master/finrl/meta/env_stock_trading/env_nas100_wrds.py
732 views
from __future__ import annotations12import os34import gym5import numpy as np6from numpy import random as rd78gym.logger.set_level(94010) # Block warning: 'WARN: Box bound precision lowered by casting to float32'111213class StockEnvNAS100:14def __init__(15self,16cwd="./data/nas100",17price_ary=None,18tech_ary=None,19turbulence_ary=None,20gamma=0.999,21turbulence_thresh=30,22min_stock_rate=0.1,23max_stock=1e2,24initial_capital=1e6,25buy_cost_pct=1e-3,26sell_cost_pct=1e-3,27data_gap=4,28reward_scaling=2**-11,29ticker_list=None,30tech_indicator_list=None,31initial_stocks=None,32if_eval=False,33if_trade=False,34):35self.min_stock_rate = min_stock_rate36beg_i, mid_i, end_i = 0, int(211210), int(422420)3738(i0, i1) = (beg_i, mid_i) if if_eval else (mid_i, end_i)39data_arrays = (40self.load_data(cwd) if cwd is not None else price_ary,41tech_ary,42turbulence_ary,43)44if not if_trade:45data_arrays = [ary[i0:i1:data_gap] for ary in data_arrays]46else:47data_arrays = [48ary[int(422420) : int(528026) : data_gap] for ary in data_arrays49]50self.price_ary, self.tech_ary, turbulence_ary = data_arrays5152self.tech_ary = self.tech_ary * 2**-753self.turbulence_bool = (turbulence_ary > turbulence_thresh).astype(np.float32)54self.turbulence_ary = (55self.sigmoid_sign(turbulence_ary, turbulence_thresh) * 2**-556).astype(np.float32)5758stock_dim = self.price_ary.shape[1]59self.gamma = gamma60self.max_stock = max_stock61self.buy_cost_pct = buy_cost_pct62self.sell_cost_pct = sell_cost_pct63self.reward_scaling = reward_scaling64self.initial_capital = initial_capital65self.initial_stocks = (66np.zeros(stock_dim, dtype=np.float32)67if initial_stocks is None68else initial_stocks69)7071# reset()72self.day = None73self.amount = None74self.stocks = None75self.total_asset = None76self.gamma_reward = None77self.initial_total_asset = None7879# environment information80self.env_name = "StockEnvNAS"81# self.state_dim = 1 + 2 + 2 * stock_dim + self.tech_ary.shape[1]82# # amount + (turbulence, turbulence_bool) + (price, stock) * stock_dim + tech_dim83self.state_dim = 1 + 2 + 3 * stock_dim + self.tech_ary.shape[1]84# amount + (turbulence, turbulence_bool) + (price, stock) * stock_dim + tech_dim85self.stocks_cd = None86self.action_dim = stock_dim87self.max_step = self.price_ary.shape[0] - 188self.if_discrete = False89self.target_return = 2.290self.episode_return = 0.09192def reset(93self,94*,95seed=None,96options=None,97):98self.day = 099price = self.price_ary[self.day]100101self.stocks = (102self.initial_stocks + rd.randint(0, 64, size=self.initial_stocks.shape)103).astype(np.float32)104self.stocks_cd = np.zeros_like(self.stocks)105self.amount = (106self.initial_capital * rd.uniform(0.95, 1.05) - (self.stocks * price).sum()107)108109self.total_asset = self.amount + (self.stocks * price).sum()110self.initial_total_asset = self.total_asset111self.gamma_reward = 0.0112return self.get_state(price) # state113114def step(self, actions):115actions = (actions * self.max_stock).astype(int)116117self.day += 1118price = self.price_ary[self.day]119self.stocks_cd += 1120121if self.turbulence_bool[self.day] == 0:122min_action = int(self.max_stock * self.min_stock_rate) # stock_cd123for index in np.where(actions < -min_action)[0]: # sell_index:124if price[index] > 0: # Sell only if current asset is > 0125sell_num_shares = min(self.stocks[index], -actions[index])126self.stocks[index] -= sell_num_shares127self.amount += (128price[index] * sell_num_shares * (1 - self.sell_cost_pct)129)130self.stocks_cd[index] = 0131for index in np.where(actions > min_action)[0]: # buy_index:132if (133price[index] > 0134): # Buy only if the price is > 0 (no missing data in this particular date)135buy_num_shares = min(self.amount // price[index], actions[index])136self.stocks[index] += buy_num_shares137self.amount -= (138price[index] * buy_num_shares * (1 + self.buy_cost_pct)139)140self.stocks_cd[index] = 0141142else: # sell all when turbulence143self.amount += (self.stocks * price).sum() * (1 - self.sell_cost_pct)144self.stocks[:] = 0145self.stocks_cd[:] = 0146147state = self.get_state(price)148total_asset = self.amount + (self.stocks * price).sum()149reward = (total_asset - self.total_asset) * self.reward_scaling150self.total_asset = total_asset151152self.gamma_reward = self.gamma_reward * self.gamma + reward153done = self.day == self.max_step154if done:155reward = self.gamma_reward156self.episode_return = total_asset / self.initial_total_asset157158return state, reward, done, dict()159160def get_state(self, price):161amount = np.array(max(self.amount, 1e4) * (2**-12), dtype=np.float32)162scale = np.array(2**-6, dtype=np.float32)163return np.hstack(164(165amount,166self.turbulence_ary[self.day],167self.turbulence_bool[self.day],168price * scale,169self.stocks * scale,170self.stocks_cd,171self.tech_ary[self.day],172)173) # state.astype(np.float32)174175def load_data(self, cwd):176data_path_price_array = f"{cwd}/price_ary.npy"177data_path_tech_array = f"{cwd}/tech_ary.npy"178data_path_turb_array = f"{cwd}/turb_ary.npy"179180turbulence_ary = np.load(181data_path_turb_array182) # turbulence_ary.shape = (1358, ). std, min, max = 3, 0, 65.2183turbulence_ary = turbulence_ary.repeat(390) # 13580*390 = 529620184turbulence_ary = turbulence_ary[-528026:] # 15926 + 528026 = 528026185186if os.path.exists(data_path_price_array):187price_ary = np.load(data_path_price_array).astype(np.float32)188tech_ary = np.load(data_path_tech_array).astype(np.float32)189# turbulence_ary = load_dict['turbulence_ary'].astype(np.float32)190191return price_ary, tech_ary, turbulence_ary192193def draw_cumulative_return(self, args, _torch) -> list:194state_dim = self.state_dim195action_dim = self.action_dim196197agent = args.agent198net_dim = args.net_dim199cwd = args.cwd200201agent.init(net_dim, state_dim, action_dim)202agent.save_load_model(cwd=cwd, if_save=False)203act = agent.act204device = agent.device205206state = self.reset()207episode_returns = list() # the cumulative_return / initial_account208with _torch.no_grad():209for i in range(self.max_step):210s_tensor = _torch.as_tensor((state,), device=device)211a_tensor = act(s_tensor) # action_tanh = act.forward()212action = (213a_tensor.detach().cpu().numpy()[0]214) # not need detach(), because with torch.no_grad() outside215state, reward, done, _ = self.step(action)216217total_asset = (218self.amount + (self.price_ary[self.day] * self.stocks).sum()219)220episode_return = total_asset / self.initial_total_asset221episode_returns.append(episode_return)222if done:223break224225import matplotlib.pyplot as plt226227plt.plot(episode_returns)228plt.grid()229plt.title("cumulative return")230plt.xlabel("day")231plt.xlabel("multiple of initial_account")232plt.savefig(f"{cwd}/cumulative_return.jpg")233print(f"| draw_cumulative_return: save in {cwd}/cumulative_return.jpg")234return episode_returns235236@staticmethod237def sigmoid_sign(ary, thresh):238def sigmoid(x):239return 1 / (1 + np.exp(-x * np.e)) - 0.5240241return sigmoid(ary / thresh) * thresh242243244