Path: blob/master/finrl/meta/env_stock_trading/env_stocktrading_np.py
732 views
from __future__ import annotations12import gymnasium as gym3import numpy as np4from numpy import random as rd567class StockTradingEnv(gym.Env):8def __init__(9self,10config,11initial_account=1e6,12gamma=0.99,13turbulence_thresh=99,14min_stock_rate=0.1,15max_stock=1e2,16initial_capital=1e6,17buy_cost_pct=1e-3,18sell_cost_pct=1e-3,19reward_scaling=2**-11,20initial_stocks=None,21):22price_ary = config["price_array"]23tech_ary = config["tech_array"]24turbulence_ary = config["turbulence_array"]25if_train = config["if_train"]26self.price_ary = price_ary.astype(np.float32)27self.tech_ary = tech_ary.astype(np.float32)28self.turbulence_ary = turbulence_ary2930self.tech_ary = self.tech_ary * 2**-731self.turbulence_bool = (turbulence_ary > turbulence_thresh).astype(np.float32)32self.turbulence_ary = (33self.sigmoid_sign(turbulence_ary, turbulence_thresh) * 2**-534).astype(np.float32)3536stock_dim = self.price_ary.shape[1]37self.gamma = gamma38self.max_stock = max_stock39self.min_stock_rate = min_stock_rate40self.buy_cost_pct = buy_cost_pct41self.sell_cost_pct = sell_cost_pct42self.reward_scaling = reward_scaling43self.initial_capital = initial_capital44self.initial_stocks = (45np.zeros(stock_dim, dtype=np.float32)46if initial_stocks is None47else initial_stocks48)4950# reset()51self.day = None52self.amount = None53self.stocks = None54self.total_asset = None55self.gamma_reward = None56self.initial_total_asset = None5758# environment information59self.env_name = "StockEnv"60# self.state_dim = 1 + 2 + 2 * stock_dim + self.tech_ary.shape[1]61# # amount + (turbulence, turbulence_bool) + (price, stock) * stock_dim + tech_dim62self.state_dim = 1 + 2 + 3 * stock_dim + self.tech_ary.shape[1]63# amount + (turbulence, turbulence_bool) + (price, stock) * stock_dim + tech_dim64self.stocks_cd = None65self.action_dim = stock_dim66self.max_step = self.price_ary.shape[0] - 167self.if_train = if_train68self.if_discrete = False69self.target_return = 10.070self.episode_return = 0.07172self.observation_space = gym.spaces.Box(73low=-3000, high=3000, shape=(self.state_dim,), dtype=np.float3274)75self.action_space = gym.spaces.Box(76low=-1, high=1, shape=(self.action_dim,), dtype=np.float3277)7879def reset(80self,81*,82seed=None,83options=None,84):85self.day = 086price = self.price_ary[self.day]8788if self.if_train:89self.stocks = (90self.initial_stocks + rd.randint(0, 64, size=self.initial_stocks.shape)91).astype(np.float32)92self.stocks_cool_down = np.zeros_like(self.stocks)93self.amount = (94self.initial_capital * rd.uniform(0.95, 1.05)95- (self.stocks * price).sum()96)97else:98self.stocks = self.initial_stocks.astype(np.float32)99self.stocks_cool_down = np.zeros_like(self.stocks)100self.amount = self.initial_capital101102self.total_asset = self.amount + (self.stocks * price).sum()103self.initial_total_asset = self.total_asset104self.gamma_reward = 0.0105return self.get_state(price), {} # state106107def step(self, actions):108actions = (actions * self.max_stock).astype(int)109110self.day += 1111price = self.price_ary[self.day]112self.stocks_cool_down += 1113114if self.turbulence_bool[self.day] == 0:115min_action = int(self.max_stock * self.min_stock_rate) # stock_cd116for index in np.where(actions < -min_action)[0]: # sell_index:117if price[index] > 0: # Sell only if current asset is > 0118sell_num_shares = min(self.stocks[index], -actions[index])119self.stocks[index] -= sell_num_shares120self.amount += (121price[index] * sell_num_shares * (1 - self.sell_cost_pct)122)123self.stocks_cool_down[index] = 0124for index in np.where(actions > min_action)[0]: # buy_index:125if (126price[index] > 0127): # Buy only if the price is > 0 (no missing data in this particular date)128buy_num_shares = min(self.amount // price[index], actions[index])129self.stocks[index] += buy_num_shares130self.amount -= (131price[index] * buy_num_shares * (1 + self.buy_cost_pct)132)133self.stocks_cool_down[index] = 0134135else: # sell all when turbulence136self.amount += (self.stocks * price).sum() * (1 - self.sell_cost_pct)137self.stocks[:] = 0138self.stocks_cool_down[:] = 0139140state = self.get_state(price)141total_asset = self.amount + (self.stocks * price).sum()142reward = (total_asset - self.total_asset) * self.reward_scaling143self.total_asset = total_asset144145self.gamma_reward = self.gamma_reward * self.gamma + reward146done = self.day == self.max_step147if done:148reward = self.gamma_reward149self.episode_return = total_asset / self.initial_total_asset150151return state, reward, done, False, dict()152153def get_state(self, price):154amount = np.array(self.amount * (2**-12), dtype=np.float32)155scale = np.array(2**-6, dtype=np.float32)156return np.hstack(157(158amount,159self.turbulence_ary[self.day],160self.turbulence_bool[self.day],161price * scale,162self.stocks * scale,163self.stocks_cool_down,164self.tech_ary[self.day],165)166) # state.astype(np.float32)167168@staticmethod169def sigmoid_sign(ary, thresh):170def sigmoid(x):171return 1 / (1 + np.exp(-x * np.e)) - 0.5172173return sigmoid(ary / thresh) * thresh174175176