CoCalc -- trading

GitHub Repository: packtpublishing/machine-learning-for-algorithmic-trading-second-edition
Path: blob/master/22_deep_reinforcement_learning/trading_env.py
²⁹²³ views
1
"""
2
The MIT License (MIT)
3

4
Copyright (c) 2016 Tito Ingargiola
5
Copyright (c) 2019 Stefan Jansen
6

7
Permission is hereby granted, free of charge, to any person obtaining a copy
8
of this software and associated documentation files (the "Software"), to deal
9
in the Software without restriction, including without limitation the rights
10
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
copies of the Software, and to permit persons to whom the Software is
12
furnished to do so, subject to the following conditions:
13

14
The above copyright notice and this permission notice shall be included in all
15
copies or substantial portions of the Software.
16

17
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
SOFTWARE.
24
"""
25

26
import logging
27
import tempfile
28

29
import gym
30
import numpy as np
31
import pandas as pd
32
from gym import spaces
33
from gym.utils import seeding
34
from sklearn.preprocessing import scale
35
import talib
36

37
logging.basicConfig()
38
log = logging.getLogger(__name__)
39
log.setLevel(logging.INFO)
40
log.info('%s logger started.', __name__)
41

42

43
class DataSource:
44
    """
45
    Data source for TradingEnvironment
46

47
    Loads & preprocesses daily price & volume data
48
    Provides data for each new episode.
49
    Stocks with longest history:
50

51
    ticker  # obs
52
    KO      14155
53
    GE      14155
54
    BA      14155
55
    CAT     14155
56
    DIS     14155
57

58
    """
59

60
    def __init__(self, trading_days=252, ticker='AAPL', normalize=True):
61
        self.ticker = ticker
62
        self.trading_days = trading_days
63
        self.normalize = normalize
64
        self.data = self.load_data()
65
        self.preprocess_data()
66
        self.min_values = self.data.min()
67
        self.max_values = self.data.max()
68
        self.step = 0
69
        self.offset = None
70

71
    def load_data(self):
72
        log.info('loading data for {}...'.format(self.ticker))
73
        idx = pd.IndexSlice
74
        with pd.HDFStore('../data/assets.h5') as store:
75
            df = (store['quandl/wiki/prices']
76
                  .loc[idx[:, self.ticker],
77
                       ['adj_close', 'adj_volume', 'adj_low', 'adj_high']]
78
                  .dropna()
79
                  .sort_index())
80
        df.columns = ['close', 'volume', 'low', 'high']
81
        log.info('got data for {}...'.format(self.ticker))
82
        return df
83

84
    def preprocess_data(self):
85
        """calculate returns and percentiles, then removes missing values"""
86

87
        self.data['returns'] = self.data.close.pct_change()
88
        self.data['ret_2'] = self.data.close.pct_change(2)
89
        self.data['ret_5'] = self.data.close.pct_change(5)
90
        self.data['ret_10'] = self.data.close.pct_change(10)
91
        self.data['ret_21'] = self.data.close.pct_change(21)
92
        self.data['rsi'] = talib.STOCHRSI(self.data.close)[1]
93
        self.data['macd'] = talib.MACD(self.data.close)[1]
94
        self.data['atr'] = talib.ATR(self.data.high, self.data.low, self.data.close)
95

96
        slowk, slowd = talib.STOCH(self.data.high, self.data.low, self.data.close)
97
        self.data['stoch'] = slowd - slowk
98
        self.data['atr'] = talib.ATR(self.data.high, self.data.low, self.data.close)
99
        self.data['ultosc'] = talib.ULTOSC(self.data.high, self.data.low, self.data.close)
100
        self.data = (self.data.replace((np.inf, -np.inf), np.nan)
101
                     .drop(['high', 'low', 'close', 'volume'], axis=1)
102
                     .dropna())
103

104
        r = self.data.returns.copy()
105
        if self.normalize:
106
            self.data = pd.DataFrame(scale(self.data),
107
                                     columns=self.data.columns,
108
                                     index=self.data.index)
109
        features = self.data.columns.drop('returns')
110
        self.data['returns'] = r  # don't scale returns
111
        self.data = self.data.loc[:, ['returns'] + list(features)]
112
        log.info(self.data.info())
113

114
    def reset(self):
115
        """Provides starting index for time series and resets step"""
116
        high = len(self.data.index) - self.trading_days
117
        self.offset = np.random.randint(low=0, high=high)
118
        self.step = 0
119

120
    def take_step(self):
121
        """Returns data for current trading day and done signal"""
122
        obs = self.data.iloc[self.offset + self.step].values
123
        self.step += 1
124
        done = self.step > self.trading_days
125
        return obs, done
126

127

128
class TradingSimulator:
129
    """ Implements core trading simulator for single-instrument univ """
130

131
    def __init__(self, steps, trading_cost_bps, time_cost_bps):
132
        # invariant for object life
133
        self.trading_cost_bps = trading_cost_bps
134
        self.time_cost_bps = time_cost_bps
135
        self.steps = steps
136

137
        # change every step
138
        self.step = 0
139
        self.actions = np.zeros(self.steps)
140
        self.navs = np.ones(self.steps)
141
        self.market_navs = np.ones(self.steps)
142
        self.strategy_returns = np.ones(self.steps)
143
        self.positions = np.zeros(self.steps)
144
        self.costs = np.zeros(self.steps)
145
        self.trades = np.zeros(self.steps)
146
        self.market_returns = np.zeros(self.steps)
147

148
    def reset(self):
149
        self.step = 0
150
        self.actions.fill(0)
151
        self.navs.fill(1)
152
        self.market_navs.fill(1)
153
        self.strategy_returns.fill(0)
154
        self.positions.fill(0)
155
        self.costs.fill(0)
156
        self.trades.fill(0)
157
        self.market_returns.fill(0)
158

159
    def take_step(self, action, market_return):
160
        """ Calculates NAVs, trading costs and reward
161
            based on an action and latest market return
162
            and returns the reward and a summary of the day's activity. """
163

164
        start_position = self.positions[max(0, self.step - 1)]
165
        start_nav = self.navs[max(0, self.step - 1)]
166
        start_market_nav = self.market_navs[max(0, self.step - 1)]
167
        self.market_returns[self.step] = market_return
168
        self.actions[self.step] = action
169

170
        end_position = action - 1  # short, neutral, long
171
        n_trades = end_position - start_position
172
        self.positions[self.step] = end_position
173
        self.trades[self.step] = n_trades
174

175
        # roughly value based since starting NAV = 1
176
        trade_costs = abs(n_trades) * self.trading_cost_bps
177
        time_cost = 0 if n_trades else self.time_cost_bps
178
        self.costs[self.step] = trade_costs + time_cost
179
        reward = start_position * market_return - self.costs[max(0, self.step-1)]
180
        self.strategy_returns[self.step] = reward
181

182
        if self.step != 0:
183
            self.navs[self.step] = start_nav * (1 + self.strategy_returns[self.step])
184
            self.market_navs[self.step] = start_market_nav * (1 + self.market_returns[self.step])
185

186
        info = {'reward': reward,
187
                'nav'   : self.navs[self.step],
188
                'costs' : self.costs[self.step]}
189

190
        self.step += 1
191
        return reward, info
192

193
    def result(self):
194
        """returns current state as pd.DataFrame """
195
        return pd.DataFrame({'action'         : self.actions,  # current action
196
                             'nav'            : self.navs,  # starting Net Asset Value (NAV)
197
                             'market_nav'     : self.market_navs,
198
                             'market_return'  : self.market_returns,
199
                             'strategy_return': self.strategy_returns,
200
                             'position'       : self.positions,  # eod position
201
                             'cost'           : self.costs,  # eod costs
202
                             'trade'          : self.trades})  # eod trade)
203

204

205
class TradingEnvironment(gym.Env):
206
    """A simple trading environment for reinforcement learning.
207

208
    Provides daily observations for a stock price series
209
    An episode is defined as a sequence of 252 trading days with random start
210
    Each day is a 'step' that allows the agent to choose one of three actions:
211
    - 0: SHORT
212
    - 1: HOLD
213
    - 2: LONG
214

215
    Trading has an optional cost (default: 10bps) of the change in position value.
216
    Going from short to long implies two trades.
217
    Not trading also incurs a default time cost of 1bps per step.
218

219
    An episode begins with a starting Net Asset Value (NAV) of 1 unit of cash.
220
    If the NAV drops to 0, the episode ends with a loss.
221
    If the NAV hits 2.0, the agent wins.
222

223
    The trading simulator tracks a buy-and-hold strategy as benchmark.
224
    """
225
    metadata = {'render.modes': ['human']}
226

227
    def __init__(self,
228
                 trading_days=252,
229
                 trading_cost_bps=1e-3,
230
                 time_cost_bps=1e-4,
231
                 ticker='AAPL'):
232
        self.trading_days = trading_days
233
        self.trading_cost_bps = trading_cost_bps
234
        self.ticker = ticker
235
        self.time_cost_bps = time_cost_bps
236
        self.data_source = DataSource(trading_days=self.trading_days,
237
                                      ticker=ticker)
238
        self.simulator = TradingSimulator(steps=self.trading_days,
239
                                          trading_cost_bps=self.trading_cost_bps,
240
                                          time_cost_bps=self.time_cost_bps)
241
        self.action_space = spaces.Discrete(3)
242
        self.observation_space = spaces.Box(self.data_source.min_values,
243
                                            self.data_source.max_values)
244
        self.reset()
245

246
    def seed(self, seed=None):
247
        self.np_random, seed = seeding.np_random(seed)
248
        return [seed]
249

250
    def step(self, action):
251
        """Returns state observation, reward, done and info"""
252
        assert self.action_space.contains(action), '{} {} invalid'.format(action, type(action))
253
        observation, done = self.data_source.take_step()
254
        reward, info = self.simulator.take_step(action=action,
255
                                                market_return=observation[0])
256
        return observation, reward, done, info
257

258
    def reset(self):
259
        """Resets DataSource and TradingSimulator; returns first observation"""
260
        self.data_source.reset()
261
        self.simulator.reset()
262
        return self.data_source.take_step()[0]
263

264
    # TODO
265
    def render(self, mode='human'):
266
        """Not implemented"""
267
        pass
268

269
Product

Resources

Company