Path: blob/master/22_deep_reinforcement_learning/trading_env.py
2923 views
"""1The MIT License (MIT)23Copyright (c) 2016 Tito Ingargiola4Copyright (c) 2019 Stefan Jansen56Permission is hereby granted, free of charge, to any person obtaining a copy7of this software and associated documentation files (the "Software"), to deal8in the Software without restriction, including without limitation the rights9to use, copy, modify, merge, publish, distribute, sublicense, and/or sell10copies of the Software, and to permit persons to whom the Software is11furnished to do so, subject to the following conditions:1213The above copyright notice and this permission notice shall be included in all14copies or substantial portions of the Software.1516THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR17IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,18FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE19AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER20LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,21OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE22SOFTWARE.23"""2425import logging26import tempfile2728import gym29import numpy as np30import pandas as pd31from gym import spaces32from gym.utils import seeding33from sklearn.preprocessing import scale34import talib3536logging.basicConfig()37log = logging.getLogger(__name__)38log.setLevel(logging.INFO)39log.info('%s logger started.', __name__)404142class DataSource:43"""44Data source for TradingEnvironment4546Loads & preprocesses daily price & volume data47Provides data for each new episode.48Stocks with longest history:4950ticker # obs51KO 1415552GE 1415553BA 1415554CAT 1415555DIS 141555657"""5859def __init__(self, trading_days=252, ticker='AAPL', normalize=True):60self.ticker = ticker61self.trading_days = trading_days62self.normalize = normalize63self.data = self.load_data()64self.preprocess_data()65self.min_values = self.data.min()66self.max_values = self.data.max()67self.step = 068self.offset = None6970def load_data(self):71log.info('loading data for {}...'.format(self.ticker))72idx = pd.IndexSlice73with pd.HDFStore('../data/assets.h5') as store:74df = (store['quandl/wiki/prices']75.loc[idx[:, self.ticker],76['adj_close', 'adj_volume', 'adj_low', 'adj_high']]77.dropna()78.sort_index())79df.columns = ['close', 'volume', 'low', 'high']80log.info('got data for {}...'.format(self.ticker))81return df8283def preprocess_data(self):84"""calculate returns and percentiles, then removes missing values"""8586self.data['returns'] = self.data.close.pct_change()87self.data['ret_2'] = self.data.close.pct_change(2)88self.data['ret_5'] = self.data.close.pct_change(5)89self.data['ret_10'] = self.data.close.pct_change(10)90self.data['ret_21'] = self.data.close.pct_change(21)91self.data['rsi'] = talib.STOCHRSI(self.data.close)[1]92self.data['macd'] = talib.MACD(self.data.close)[1]93self.data['atr'] = talib.ATR(self.data.high, self.data.low, self.data.close)9495slowk, slowd = talib.STOCH(self.data.high, self.data.low, self.data.close)96self.data['stoch'] = slowd - slowk97self.data['atr'] = talib.ATR(self.data.high, self.data.low, self.data.close)98self.data['ultosc'] = talib.ULTOSC(self.data.high, self.data.low, self.data.close)99self.data = (self.data.replace((np.inf, -np.inf), np.nan)100.drop(['high', 'low', 'close', 'volume'], axis=1)101.dropna())102103r = self.data.returns.copy()104if self.normalize:105self.data = pd.DataFrame(scale(self.data),106columns=self.data.columns,107index=self.data.index)108features = self.data.columns.drop('returns')109self.data['returns'] = r # don't scale returns110self.data = self.data.loc[:, ['returns'] + list(features)]111log.info(self.data.info())112113def reset(self):114"""Provides starting index for time series and resets step"""115high = len(self.data.index) - self.trading_days116self.offset = np.random.randint(low=0, high=high)117self.step = 0118119def take_step(self):120"""Returns data for current trading day and done signal"""121obs = self.data.iloc[self.offset + self.step].values122self.step += 1123done = self.step > self.trading_days124return obs, done125126127class TradingSimulator:128""" Implements core trading simulator for single-instrument univ """129130def __init__(self, steps, trading_cost_bps, time_cost_bps):131# invariant for object life132self.trading_cost_bps = trading_cost_bps133self.time_cost_bps = time_cost_bps134self.steps = steps135136# change every step137self.step = 0138self.actions = np.zeros(self.steps)139self.navs = np.ones(self.steps)140self.market_navs = np.ones(self.steps)141self.strategy_returns = np.ones(self.steps)142self.positions = np.zeros(self.steps)143self.costs = np.zeros(self.steps)144self.trades = np.zeros(self.steps)145self.market_returns = np.zeros(self.steps)146147def reset(self):148self.step = 0149self.actions.fill(0)150self.navs.fill(1)151self.market_navs.fill(1)152self.strategy_returns.fill(0)153self.positions.fill(0)154self.costs.fill(0)155self.trades.fill(0)156self.market_returns.fill(0)157158def take_step(self, action, market_return):159""" Calculates NAVs, trading costs and reward160based on an action and latest market return161and returns the reward and a summary of the day's activity. """162163start_position = self.positions[max(0, self.step - 1)]164start_nav = self.navs[max(0, self.step - 1)]165start_market_nav = self.market_navs[max(0, self.step - 1)]166self.market_returns[self.step] = market_return167self.actions[self.step] = action168169end_position = action - 1 # short, neutral, long170n_trades = end_position - start_position171self.positions[self.step] = end_position172self.trades[self.step] = n_trades173174# roughly value based since starting NAV = 1175trade_costs = abs(n_trades) * self.trading_cost_bps176time_cost = 0 if n_trades else self.time_cost_bps177self.costs[self.step] = trade_costs + time_cost178reward = start_position * market_return - self.costs[max(0, self.step-1)]179self.strategy_returns[self.step] = reward180181if self.step != 0:182self.navs[self.step] = start_nav * (1 + self.strategy_returns[self.step])183self.market_navs[self.step] = start_market_nav * (1 + self.market_returns[self.step])184185info = {'reward': reward,186'nav' : self.navs[self.step],187'costs' : self.costs[self.step]}188189self.step += 1190return reward, info191192def result(self):193"""returns current state as pd.DataFrame """194return pd.DataFrame({'action' : self.actions, # current action195'nav' : self.navs, # starting Net Asset Value (NAV)196'market_nav' : self.market_navs,197'market_return' : self.market_returns,198'strategy_return': self.strategy_returns,199'position' : self.positions, # eod position200'cost' : self.costs, # eod costs201'trade' : self.trades}) # eod trade)202203204class TradingEnvironment(gym.Env):205"""A simple trading environment for reinforcement learning.206207Provides daily observations for a stock price series208An episode is defined as a sequence of 252 trading days with random start209Each day is a 'step' that allows the agent to choose one of three actions:210- 0: SHORT211- 1: HOLD212- 2: LONG213214Trading has an optional cost (default: 10bps) of the change in position value.215Going from short to long implies two trades.216Not trading also incurs a default time cost of 1bps per step.217218An episode begins with a starting Net Asset Value (NAV) of 1 unit of cash.219If the NAV drops to 0, the episode ends with a loss.220If the NAV hits 2.0, the agent wins.221222The trading simulator tracks a buy-and-hold strategy as benchmark.223"""224metadata = {'render.modes': ['human']}225226def __init__(self,227trading_days=252,228trading_cost_bps=1e-3,229time_cost_bps=1e-4,230ticker='AAPL'):231self.trading_days = trading_days232self.trading_cost_bps = trading_cost_bps233self.ticker = ticker234self.time_cost_bps = time_cost_bps235self.data_source = DataSource(trading_days=self.trading_days,236ticker=ticker)237self.simulator = TradingSimulator(steps=self.trading_days,238trading_cost_bps=self.trading_cost_bps,239time_cost_bps=self.time_cost_bps)240self.action_space = spaces.Discrete(3)241self.observation_space = spaces.Box(self.data_source.min_values,242self.data_source.max_values)243self.reset()244245def seed(self, seed=None):246self.np_random, seed = seeding.np_random(seed)247return [seed]248249def step(self, action):250"""Returns state observation, reward, done and info"""251assert self.action_space.contains(action), '{} {} invalid'.format(action, type(action))252observation, done = self.data_source.take_step()253reward, info = self.simulator.take_step(action=action,254market_return=observation[0])255return observation, reward, done, info256257def reset(self):258"""Resets DataSource and TradingSimulator; returns first observation"""259self.data_source.reset()260self.simulator.reset()261return self.data_source.take_step()[0]262263# TODO264def render(self, mode='human'):265"""Not implemented"""266pass267268269