Path: blob/master/finrl/meta/env_portfolio_allocation/env_portfolio.py
732 views
from __future__ import annotations12import gymnasium as gym3import matplotlib4import matplotlib.pyplot as plt5import numpy as np6import pandas as pd7from gymnasium import spaces8from gymnasium.utils import seeding9from stable_baselines3.common.vec_env import DummyVecEnv1011matplotlib.use("Agg")121314class StockPortfolioEnv(gym.Env):15"""A single stock trading environment for OpenAI gym1617Attributes18----------19df: DataFrame20input data21stock_dim : int22number of unique stocks23hmax : int24maximum number of shares to trade25initial_amount : int26start money27transaction_cost_pct: float28transaction cost percentage per trade29reward_scaling: float30scaling factor for reward, good for training31state_space: int32the dimension of input features33action_space: int34equals stock dimension35tech_indicator_list: list36a list of technical indicator names37turbulence_threshold: int38a threshold to control risk aversion39day: int40an increment number to control date4142Methods43-------44_sell_stock()45perform sell action based on the sign of the action46_buy_stock()47perform buy action based on the sign of the action48step()49at each step the agent will return actions, then50we will calculate the reward, and return the next observation.51reset()52reset the environment53render()54use render to return other functions55save_asset_memory()56return account value at each time step57save_action_memory()58return actions/positions at each time step596061"""6263metadata = {"render.modes": ["human"]}6465def __init__(66self,67df,68stock_dim,69hmax,70initial_amount,71transaction_cost_pct,72reward_scaling,73state_space,74action_space,75tech_indicator_list,76turbulence_threshold=None,77lookback=252,78day=0,79):80# super(StockEnv, self).__init__()81# money = 10 , scope = 182self.day = day83self.lookback = lookback84self.df = df85self.stock_dim = stock_dim86self.hmax = hmax87self.initial_amount = initial_amount88self.transaction_cost_pct = transaction_cost_pct89self.reward_scaling = reward_scaling90self.state_space = state_space91self.action_space = action_space92self.tech_indicator_list = tech_indicator_list9394# action_space normalization and shape is self.stock_dim95self.action_space = spaces.Box(low=0, high=1, shape=(self.action_space,))96# Shape = (34, 30)97# covariance matrix + technical indicators98self.observation_space = spaces.Box(99low=-np.inf,100high=np.inf,101shape=(self.state_space + len(self.tech_indicator_list), self.state_space),102)103104# load data from a pandas dataframe105self.data = self.df.loc[self.day, :]106self.covs = self.data["cov_list"].values[0]107self.state = np.append(108np.array(self.covs),109[self.data[tech].values.tolist() for tech in self.tech_indicator_list],110axis=0,111)112self.terminal = False113self.turbulence_threshold = turbulence_threshold114# initalize state: inital portfolio return + individual stock return + individual weights115self.portfolio_value = self.initial_amount116117# memorize portfolio value each step118self.asset_memory = [self.initial_amount]119# memorize portfolio return each step120self.portfolio_return_memory = [0]121self.actions_memory = [[1 / self.stock_dim] * self.stock_dim]122self.date_memory = [self.data.date.unique()[0]]123124def step(self, actions):125# print(self.day)126self.terminal = self.day >= len(self.df.index.unique()) - 1127# print(actions)128129if self.terminal:130df = pd.DataFrame(self.portfolio_return_memory)131df.columns = ["daily_return"]132plt.plot(df.daily_return.cumsum(), "r")133plt.savefig("results/cumulative_reward.png")134plt.close()135136plt.plot(self.portfolio_return_memory, "r")137plt.savefig("results/rewards.png")138plt.close()139140print("=================================")141print(f"begin_total_asset:{self.asset_memory[0]}")142print(f"end_total_asset:{self.portfolio_value}")143144df_daily_return = pd.DataFrame(self.portfolio_return_memory)145df_daily_return.columns = ["daily_return"]146if df_daily_return["daily_return"].std() != 0:147sharpe = (148(252**0.5)149* df_daily_return["daily_return"].mean()150/ df_daily_return["daily_return"].std()151)152print("Sharpe: ", sharpe)153print("=================================")154155return self.state, self.reward, self.terminal, False, {}156157else:158# print("Model actions: ",actions)159# actions are the portfolio weight160# normalize to sum of 1161# if (np.array(actions) - np.array(actions).min()).sum() != 0:162# norm_actions = (np.array(actions) - np.array(actions).min()) /163# (np.array(actions) - np.array(actions).min()).sum()164# else:165# norm_actions = actions166weights = self.softmax_normalization(actions)167# print("Normalized actions: ", weights)168self.actions_memory.append(weights)169last_day_memory = self.data170171# load next state172self.day += 1173self.data = self.df.loc[self.day, :]174self.covs = self.data["cov_list"].values[0]175self.state = np.append(176np.array(self.covs),177[self.data[tech].values.tolist() for tech in self.tech_indicator_list],178axis=0,179)180# print(self.state)181# calcualte portfolio return182# individual stocks' return * weight183portfolio_return = sum(184((self.data.close.values / last_day_memory.close.values) - 1) * weights185)186# update portfolio value187new_portfolio_value = self.portfolio_value * (1 + portfolio_return)188self.portfolio_value = new_portfolio_value189190# save into memory191self.portfolio_return_memory.append(portfolio_return)192self.date_memory.append(self.data.date.unique()[0])193self.asset_memory.append(new_portfolio_value)194195# the reward is the new portfolio value or end portfolo value196self.reward = new_portfolio_value197# print("Step reward: ", self.reward)198# self.reward = self.reward*self.reward_scaling199200return self.state, self.reward, self.terminal, False, {}201202def reset(203self,204*,205seed=None,206options=None,207):208self.asset_memory = [self.initial_amount]209self.day = 0210self.data = self.df.loc[self.day, :]211# load states212self.covs = self.data["cov_list"].values[0]213self.state = np.append(214np.array(self.covs),215[self.data[tech].values.tolist() for tech in self.tech_indicator_list],216axis=0,217)218self.portfolio_value = self.initial_amount219# self.cost = 0220# self.trades = 0221self.terminal = False222self.portfolio_return_memory = [0]223self.actions_memory = [[1 / self.stock_dim] * self.stock_dim]224self.date_memory = [self.data.date.unique()[0]]225return self.state, {}226227def render(self, mode="human"):228return self.state229230def softmax_normalization(self, actions):231numerator = np.exp(actions)232denominator = np.sum(np.exp(actions))233softmax_output = numerator / denominator234return softmax_output235236def save_asset_memory(self):237date_list = self.date_memory238portfolio_return = self.portfolio_return_memory239# print(len(date_list))240# print(len(asset_list))241df_account_value = pd.DataFrame(242{"date": date_list, "daily_return": portfolio_return}243)244return df_account_value245246def save_action_memory(self):247# date and close price length must match actions length248date_list = self.date_memory249df_date = pd.DataFrame(date_list)250df_date.columns = ["date"]251252action_list = self.actions_memory253df_actions = pd.DataFrame(action_list)254df_actions.columns = self.data.tic.values255df_actions.index = df_date.date256# df_actions = pd.DataFrame({'date':date_list,'actions':action_list})257return df_actions258259def _seed(self, seed=None):260self.np_random, seed = seeding.np_random(seed)261return [seed]262263def get_sb_env(self):264e = DummyVecEnv([lambda: self])265obs = e.reset()266return e, obs267268269