Path: blob/master/finrl/meta/env_stock_trading/env_stocktrading_stoploss.py
732 views
from __future__ import annotations12import random3import time4from copy import deepcopy56import gym7import matplotlib8import numpy as np9import pandas as pd10from gym import spaces11from stable_baselines3.common import logger12from stable_baselines3.common.vec_env import DummyVecEnv13from stable_baselines3.common.vec_env import SubprocVecEnv1415matplotlib.use("Agg")161718class StockTradingEnvStopLoss(gym.Env):19"""20A stock trading environment for OpenAI gym21This environment penalizes the model if excedeed the stop-loss threshold, selling assets with under expectation %profit, and also22for not maintaining a reserve of cash.23This enables the model to do trading with high confidence and manage cash reserves in addition to performing trading procedures.2425Reward at any step is given as follows26r_i = (sum(cash, asset_value) + additional_reward - total_penalty - initial_cash) / initial_cash / days_elapsed27, where total_penalty = cash_penalty + stop_loss_penalty + low_profit_penalty28cash_penalty = max(0, sum(cash, asset_value)*cash_penalty_proportion-cash)29stop_loss_penalty = -1 * dot(holdings,negative_closing_diff_avg_buy)30low_profit_penalty = -1 * dot(holdings,negative_profit_sell_diff_avg_buy)31additional_reward = dot(holdings,positive_profit_sell_diff_avg_buy)3233This reward function takes into account a profit/loss ratio constraint, liquidity requirement, as well as long-term accrued rewards.34This reward function also forces the model to trade only when it's really confident to do so.3536Parameters:37state space: {start_cash, <owned_shares>, for s in stocks{<stock.values>}, }38df (pandas.DataFrame): Dataframe containing data39buy_cost_pct (float): cost for buying shares40sell_cost_pct (float): cost for selling shares41hmax (int): max number of share purchases allowed per asset42discrete_actions (bool): option to choose whether perform dicretization on actions space or not43shares_increment (int): multiples number of shares can be bought in each trade.44stoploss_penalty (float): Maximum loss we can tolerate. Valid value range is between 0 and 1. If x is specified, then agent will force sell all holdings for a particular asset if current price < x * avg_buy_price45profit_loss_ratio (int, float): Expected profit/loss ratio. Only applicable when stoploss_penalty < 1.46turbulence_threshold (float): Maximum turbulence allowed in market for purchases to occur. If exceeded, positions are liquidated47print_verbosity(int): When iterating (step), how often to print stats about state of env48initial_amount: (int, float): Amount of cash initially available49daily_information_columns (list(str)): Columns to use when building state space from the dataframe. It could be OHLC columns or any other variables such as technical indicators and turbulence index50cash_penalty_proportion (int, float): Penalty to apply if the algorithm runs out of cash51patient (bool): option to choose whether end the cycle when we're running out of cash or just don't buy anything until we got additional cash52action space: <share_dollar_purchases>53TODO:54add holdings to memory55move transactions to after the clip step.56tests:57after reset, static strategy should result in same metrics58given no change in prices, no change in asset values59"""6061metadata = {"render.modes": ["human"]}6263def __init__(64self,65df,66buy_cost_pct=3e-3,67sell_cost_pct=3e-3,68date_col_name="date",69hmax=10,70discrete_actions=False,71shares_increment=1,72stoploss_penalty=0.9,73profit_loss_ratio=2,74turbulence_threshold=None,75print_verbosity=10,76initial_amount=1e6,77daily_information_cols=["open", "close", "high", "low", "volume"],78cache_indicator_data=True,79cash_penalty_proportion=0.1,80random_start=True,81patient=False,82currency="$",83):84self.df = df85self.stock_col = "tic"86self.assets = df[self.stock_col].unique()87self.dates = df[date_col_name].sort_values().unique()88self.random_start = random_start89self.discrete_actions = discrete_actions90self.patient = patient91self.currency = currency92self.df = self.df.set_index(date_col_name)93self.shares_increment = shares_increment94self.hmax = hmax95self.initial_amount = initial_amount96self.print_verbosity = print_verbosity97self.buy_cost_pct = buy_cost_pct98self.sell_cost_pct = sell_cost_pct99self.stoploss_penalty = stoploss_penalty100self.min_profit_penalty = 1 + profit_loss_ratio * (1 - self.stoploss_penalty)101self.turbulence_threshold = turbulence_threshold102self.daily_information_cols = daily_information_cols103self.state_space = (1041 + len(self.assets) + len(self.assets) * len(self.daily_information_cols)105)106self.action_space = spaces.Box(low=-1, high=1, shape=(len(self.assets),))107self.observation_space = spaces.Box(108low=-np.inf, high=np.inf, shape=(self.state_space,)109)110self.turbulence = 0111self.episode = -1 # initialize so we can call reset112self.episode_history = []113self.printed_header = False114self.cache_indicator_data = cache_indicator_data115self.cached_data = None116self.cash_penalty_proportion = cash_penalty_proportion117if self.cache_indicator_data:118print("caching data")119self.cached_data = [120self.get_date_vector(i) for i, _ in enumerate(self.dates)121]122print("data cached!")123124def seed(self, seed=None):125if seed is None:126seed = int(round(time.time() * 1000))127random.seed(seed)128129@property130def current_step(self):131return self.date_index - self.starting_point132133def reset(134self,135*,136seed=None,137options=None,138):139self.seed()140self.sum_trades = 0141self.actual_num_trades = 0142self.closing_diff_avg_buy = np.zeros(len(self.assets))143self.profit_sell_diff_avg_buy = np.zeros(len(self.assets))144self.n_buys = np.zeros(len(self.assets))145self.avg_buy_price = np.zeros(len(self.assets))146if self.random_start:147starting_point = random.choice(range(int(len(self.dates) * 0.5)))148self.starting_point = starting_point149else:150self.starting_point = 0151self.date_index = self.starting_point152self.turbulence = 0153self.episode += 1154self.actions_memory = []155self.transaction_memory = []156self.state_memory = []157self.account_information = {158"cash": [],159"asset_value": [],160"total_assets": [],161"reward": [],162}163init_state = np.array(164[self.initial_amount]165+ [0] * len(self.assets)166+ self.get_date_vector(self.date_index)167)168self.state_memory.append(init_state)169return init_state170171def get_date_vector(self, date, cols=None):172if (cols is None) and (self.cached_data is not None):173return self.cached_data[date]174else:175date = self.dates[date]176if cols is None:177cols = self.daily_information_cols178trunc_df = self.df.loc[[date]]179v = []180for a in self.assets:181subset = trunc_df[trunc_df[self.stock_col] == a]182v += subset.loc[date, cols].tolist()183assert len(v) == len(self.assets) * len(cols)184return v185186def return_terminal(self, reason="Last Date", reward=0):187state = self.state_memory[-1]188self.log_step(reason=reason, terminal_reward=reward)189# Add outputs to logger interface190gl_pct = self.account_information["total_assets"][-1] / self.initial_amount191logger.record("environment/GainLoss_pct", (gl_pct - 1) * 100)192logger.record(193"environment/total_assets",194int(self.account_information["total_assets"][-1]),195)196reward_pct = self.account_information["total_assets"][-1] / self.initial_amount197logger.record("environment/total_reward_pct", (reward_pct - 1) * 100)198logger.record("environment/total_trades", self.sum_trades)199logger.record(200"environment/actual_num_trades",201self.actual_num_trades,202)203logger.record(204"environment/avg_daily_trades",205self.sum_trades / (self.current_step),206)207logger.record(208"environment/avg_daily_trades_per_asset",209self.sum_trades / (self.current_step) / len(self.assets),210)211logger.record("environment/completed_steps", self.current_step)212logger.record(213"environment/sum_rewards", np.sum(self.account_information["reward"])214)215logger.record(216"environment/cash_proportion",217self.account_information["cash"][-1]218/ self.account_information["total_assets"][-1],219)220return state, reward, True, {}221222def log_step(self, reason, terminal_reward=None):223if terminal_reward is None:224terminal_reward = self.account_information["reward"][-1]225cash_pct = (226self.account_information["cash"][-1]227/ self.account_information["total_assets"][-1]228)229gl_pct = self.account_information["total_assets"][-1] / self.initial_amount230rec = [231self.episode,232self.date_index - self.starting_point,233reason,234f"{self.currency}{'{:0,.0f}'.format(float(self.account_information['cash'][-1]))}",235f"{self.currency}{'{:0,.0f}'.format(float(self.account_information['total_assets'][-1]))}",236f"{terminal_reward*100:0.5f}%",237f"{(gl_pct - 1)*100:0.5f}%",238f"{cash_pct*100:0.2f}%",239]240self.episode_history.append(rec)241print(self.template.format(*rec))242243def log_header(self):244self.template = "{0:4}|{1:4}|{2:15}|{3:15}|{4:15}|{5:10}|{6:10}|{7:10}" # column widths: 8, 10, 15, 7, 10245print(246self.template.format(247"EPISODE",248"STEPS",249"TERMINAL_REASON",250"CASH",251"TOT_ASSETS",252"TERMINAL_REWARD_unsc",253"GAINLOSS_PCT",254"CASH_PROPORTION",255)256)257self.printed_header = True258259def get_reward(self):260if self.current_step == 0:261return 0262else:263total_assets = self.account_information["total_assets"][-1]264cash = self.account_information["cash"][-1]265holdings = self.state_memory[-1][1 : len(self.assets) + 1]266neg_closing_diff_avg_buy = np.clip(self.closing_diff_avg_buy, -np.inf, 0)267neg_profit_sell_diff_avg_buy = np.clip(268self.profit_sell_diff_avg_buy, -np.inf, 0269)270pos_profit_sell_diff_avg_buy = np.clip(271self.profit_sell_diff_avg_buy, 0, np.inf272)273274cash_penalty = max(0, (total_assets * self.cash_penalty_proportion - cash))275if self.current_step > 1:276prev_holdings = self.state_memory[-2][1 : len(self.assets) + 1]277stop_loss_penalty = -1 * np.dot(278np.array(prev_holdings), neg_closing_diff_avg_buy279)280else:281stop_loss_penalty = 0282low_profit_penalty = -1 * np.dot(283np.array(holdings), neg_profit_sell_diff_avg_buy284)285total_penalty = cash_penalty + stop_loss_penalty + low_profit_penalty286287additional_reward = np.dot(np.array(holdings), pos_profit_sell_diff_avg_buy)288289reward = (290(total_assets - total_penalty + additional_reward) / self.initial_amount291) - 1292reward /= self.current_step293294return reward295296def step(self, actions):297# let's just log what we're doing in terms of max actions at each step.298self.sum_trades += np.sum(np.abs(actions))299# print header only first time300if self.printed_header is False:301self.log_header()302# print if it's time.303if (self.current_step + 1) % self.print_verbosity == 0:304self.log_step(reason="update")305# if we're at the end306if self.date_index == len(self.dates) - 1:307# if we hit the end, set reward to total gains (or losses)308return self.return_terminal(reward=self.get_reward())309else:310# compute value of cash + assets311begin_cash = self.state_memory[-1][0]312holdings = self.state_memory[-1][1 : len(self.assets) + 1]313assert min(holdings) >= 0314closings = np.array(self.get_date_vector(self.date_index, cols=["close"]))315asset_value = np.dot(holdings, closings)316# reward is (cash + assets) - (cash_last_step + assets_last_step)317reward = self.get_reward()318# log the values of cash, assets, and total assets319self.account_information["cash"].append(begin_cash)320self.account_information["asset_value"].append(asset_value)321self.account_information["total_assets"].append(begin_cash + asset_value)322self.account_information["reward"].append(reward)323324# multiply action values by our scalar multiplier and save325actions = actions * self.hmax326self.actions_memory.append(327actions * closings328) # capture what the model's trying to do329# buy/sell only if the price is > 0 (no missing data in this particular date)330actions = np.where(closings > 0, actions, 0)331if self.turbulence_threshold is not None:332# if turbulence goes over threshold, just clear out all positions333if self.turbulence >= self.turbulence_threshold:334actions = -(np.array(holdings) * closings)335self.log_step(reason="TURBULENCE")336# scale cash purchases to asset337if self.discrete_actions:338# convert into integer because we can't buy fraction of shares339actions = np.where(closings > 0, actions // closings, 0)340actions = actions.astype(int)341# round down actions to the nearest multiplies of shares_increment342actions = np.where(343actions >= 0,344(actions // self.shares_increment) * self.shares_increment,345((actions + self.shares_increment) // self.shares_increment)346* self.shares_increment,347)348else:349actions = np.where(closings > 0, actions / closings, 0)350351# clip actions so we can't sell more assets than we hold352actions = np.maximum(actions, -np.array(holdings))353354self.closing_diff_avg_buy = closings - (355self.stoploss_penalty * self.avg_buy_price356)357if begin_cash >= self.stoploss_penalty * self.initial_amount:358# clear out position if stop-loss criteria is met359actions = np.where(360self.closing_diff_avg_buy < 0, -np.array(holdings), actions361)362363if any(np.clip(self.closing_diff_avg_buy, -np.inf, 0) < 0):364self.log_step(reason="STOP LOSS")365366# compute our proceeds from sells, and add to cash367sells = -np.clip(actions, -np.inf, 0)368proceeds = np.dot(sells, closings)369costs = proceeds * self.sell_cost_pct370coh = begin_cash + proceeds371# compute the cost of our buys372buys = np.clip(actions, 0, np.inf)373spend = np.dot(buys, closings)374costs += spend * self.buy_cost_pct375# if we run out of cash...376if (spend + costs) > coh:377if self.patient:378# ... just don't buy anything until we got additional cash379self.log_step(reason="CASH SHORTAGE")380actions = np.where(actions > 0, 0, actions)381spend = 0382costs = 0383else:384# ... end the cycle and penalize385return self.return_terminal(386reason="CASH SHORTAGE", reward=self.get_reward()387)388389self.transaction_memory.append(actions) # capture what the model's could do390391# get profitable sell actions392sell_closing_price = np.where(393sells > 0, closings, 0394) # get closing price of assets that we sold395profit_sell = np.where(396sell_closing_price - self.avg_buy_price > 0, 1, 0397) # mark the one which is profitable398399self.profit_sell_diff_avg_buy = np.where(400profit_sell == 1,401closings - (self.min_profit_penalty * self.avg_buy_price),4020,403)404405if any(np.clip(self.profit_sell_diff_avg_buy, -np.inf, 0) < 0):406self.log_step(reason="LOW PROFIT")407else:408if any(np.clip(self.profit_sell_diff_avg_buy, 0, np.inf) > 0):409self.log_step(reason="HIGH PROFIT")410411# verify we didn't do anything impossible here412assert (spend + costs) <= coh413414# log actual total trades we did up to current step415self.actual_num_trades = np.sum(np.abs(np.sign(actions)))416417# update our holdings418coh = coh - spend - costs419holdings_updated = holdings + actions420421# Update average buy price422buys = np.sign(buys)423self.n_buys += buys424self.avg_buy_price = np.where(425buys > 0,426self.avg_buy_price + ((closings - self.avg_buy_price) / self.n_buys),427self.avg_buy_price,428) # incremental average429430# set as zero when we don't have any holdings anymore431self.n_buys = np.where(holdings_updated > 0, self.n_buys, 0)432self.avg_buy_price = np.where(holdings_updated > 0, self.avg_buy_price, 0)433434self.date_index += 1435if self.turbulence_threshold is not None:436self.turbulence = self.get_date_vector(437self.date_index, cols=["turbulence"]438)[0]439440# Update State441state = (442[coh] + list(holdings_updated) + self.get_date_vector(self.date_index)443)444self.state_memory.append(state)445446return state, reward, False, {}447448def get_sb_env(self):449def get_self():450return deepcopy(self)451452e = DummyVecEnv([get_self])453obs = e.reset()454return e, obs455456def get_multiproc_env(self, n=10):457def get_self():458return deepcopy(self)459460e = SubprocVecEnv([get_self for _ in range(n)], start_method="fork")461obs = e.reset()462return e, obs463464def save_asset_memory(self):465if self.current_step == 0:466return None467else:468self.account_information["date"] = self.dates[469-len(self.account_information["cash"]) :470]471return pd.DataFrame(self.account_information)472473def save_action_memory(self):474if self.current_step == 0:475return None476else:477return pd.DataFrame(478{479"date": self.dates[-len(self.account_information["cash"]) :],480"actions": self.actions_memory,481"transactions": self.transaction_memory,482}483)484485486