Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
packtpublishing
GitHub Repository: packtpublishing/machine-learning-for-algorithmic-trading-second-edition
Path: blob/master/22_deep_reinforcement_learning/trading_env.py
2923 views
1
"""
2
The MIT License (MIT)
3
4
Copyright (c) 2016 Tito Ingargiola
5
Copyright (c) 2019 Stefan Jansen
6
7
Permission is hereby granted, free of charge, to any person obtaining a copy
8
of this software and associated documentation files (the "Software"), to deal
9
in the Software without restriction, including without limitation the rights
10
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
copies of the Software, and to permit persons to whom the Software is
12
furnished to do so, subject to the following conditions:
13
14
The above copyright notice and this permission notice shall be included in all
15
copies or substantial portions of the Software.
16
17
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
SOFTWARE.
24
"""
25
26
import logging
27
import tempfile
28
29
import gym
30
import numpy as np
31
import pandas as pd
32
from gym import spaces
33
from gym.utils import seeding
34
from sklearn.preprocessing import scale
35
import talib
36
37
logging.basicConfig()
38
log = logging.getLogger(__name__)
39
log.setLevel(logging.INFO)
40
log.info('%s logger started.', __name__)
41
42
43
class DataSource:
44
"""
45
Data source for TradingEnvironment
46
47
Loads & preprocesses daily price & volume data
48
Provides data for each new episode.
49
Stocks with longest history:
50
51
ticker # obs
52
KO 14155
53
GE 14155
54
BA 14155
55
CAT 14155
56
DIS 14155
57
58
"""
59
60
def __init__(self, trading_days=252, ticker='AAPL', normalize=True):
61
self.ticker = ticker
62
self.trading_days = trading_days
63
self.normalize = normalize
64
self.data = self.load_data()
65
self.preprocess_data()
66
self.min_values = self.data.min()
67
self.max_values = self.data.max()
68
self.step = 0
69
self.offset = None
70
71
def load_data(self):
72
log.info('loading data for {}...'.format(self.ticker))
73
idx = pd.IndexSlice
74
with pd.HDFStore('../data/assets.h5') as store:
75
df = (store['quandl/wiki/prices']
76
.loc[idx[:, self.ticker],
77
['adj_close', 'adj_volume', 'adj_low', 'adj_high']]
78
.dropna()
79
.sort_index())
80
df.columns = ['close', 'volume', 'low', 'high']
81
log.info('got data for {}...'.format(self.ticker))
82
return df
83
84
def preprocess_data(self):
85
"""calculate returns and percentiles, then removes missing values"""
86
87
self.data['returns'] = self.data.close.pct_change()
88
self.data['ret_2'] = self.data.close.pct_change(2)
89
self.data['ret_5'] = self.data.close.pct_change(5)
90
self.data['ret_10'] = self.data.close.pct_change(10)
91
self.data['ret_21'] = self.data.close.pct_change(21)
92
self.data['rsi'] = talib.STOCHRSI(self.data.close)[1]
93
self.data['macd'] = talib.MACD(self.data.close)[1]
94
self.data['atr'] = talib.ATR(self.data.high, self.data.low, self.data.close)
95
96
slowk, slowd = talib.STOCH(self.data.high, self.data.low, self.data.close)
97
self.data['stoch'] = slowd - slowk
98
self.data['atr'] = talib.ATR(self.data.high, self.data.low, self.data.close)
99
self.data['ultosc'] = talib.ULTOSC(self.data.high, self.data.low, self.data.close)
100
self.data = (self.data.replace((np.inf, -np.inf), np.nan)
101
.drop(['high', 'low', 'close', 'volume'], axis=1)
102
.dropna())
103
104
r = self.data.returns.copy()
105
if self.normalize:
106
self.data = pd.DataFrame(scale(self.data),
107
columns=self.data.columns,
108
index=self.data.index)
109
features = self.data.columns.drop('returns')
110
self.data['returns'] = r # don't scale returns
111
self.data = self.data.loc[:, ['returns'] + list(features)]
112
log.info(self.data.info())
113
114
def reset(self):
115
"""Provides starting index for time series and resets step"""
116
high = len(self.data.index) - self.trading_days
117
self.offset = np.random.randint(low=0, high=high)
118
self.step = 0
119
120
def take_step(self):
121
"""Returns data for current trading day and done signal"""
122
obs = self.data.iloc[self.offset + self.step].values
123
self.step += 1
124
done = self.step > self.trading_days
125
return obs, done
126
127
128
class TradingSimulator:
129
""" Implements core trading simulator for single-instrument univ """
130
131
def __init__(self, steps, trading_cost_bps, time_cost_bps):
132
# invariant for object life
133
self.trading_cost_bps = trading_cost_bps
134
self.time_cost_bps = time_cost_bps
135
self.steps = steps
136
137
# change every step
138
self.step = 0
139
self.actions = np.zeros(self.steps)
140
self.navs = np.ones(self.steps)
141
self.market_navs = np.ones(self.steps)
142
self.strategy_returns = np.ones(self.steps)
143
self.positions = np.zeros(self.steps)
144
self.costs = np.zeros(self.steps)
145
self.trades = np.zeros(self.steps)
146
self.market_returns = np.zeros(self.steps)
147
148
def reset(self):
149
self.step = 0
150
self.actions.fill(0)
151
self.navs.fill(1)
152
self.market_navs.fill(1)
153
self.strategy_returns.fill(0)
154
self.positions.fill(0)
155
self.costs.fill(0)
156
self.trades.fill(0)
157
self.market_returns.fill(0)
158
159
def take_step(self, action, market_return):
160
""" Calculates NAVs, trading costs and reward
161
based on an action and latest market return
162
and returns the reward and a summary of the day's activity. """
163
164
start_position = self.positions[max(0, self.step - 1)]
165
start_nav = self.navs[max(0, self.step - 1)]
166
start_market_nav = self.market_navs[max(0, self.step - 1)]
167
self.market_returns[self.step] = market_return
168
self.actions[self.step] = action
169
170
end_position = action - 1 # short, neutral, long
171
n_trades = end_position - start_position
172
self.positions[self.step] = end_position
173
self.trades[self.step] = n_trades
174
175
# roughly value based since starting NAV = 1
176
trade_costs = abs(n_trades) * self.trading_cost_bps
177
time_cost = 0 if n_trades else self.time_cost_bps
178
self.costs[self.step] = trade_costs + time_cost
179
reward = start_position * market_return - self.costs[max(0, self.step-1)]
180
self.strategy_returns[self.step] = reward
181
182
if self.step != 0:
183
self.navs[self.step] = start_nav * (1 + self.strategy_returns[self.step])
184
self.market_navs[self.step] = start_market_nav * (1 + self.market_returns[self.step])
185
186
info = {'reward': reward,
187
'nav' : self.navs[self.step],
188
'costs' : self.costs[self.step]}
189
190
self.step += 1
191
return reward, info
192
193
def result(self):
194
"""returns current state as pd.DataFrame """
195
return pd.DataFrame({'action' : self.actions, # current action
196
'nav' : self.navs, # starting Net Asset Value (NAV)
197
'market_nav' : self.market_navs,
198
'market_return' : self.market_returns,
199
'strategy_return': self.strategy_returns,
200
'position' : self.positions, # eod position
201
'cost' : self.costs, # eod costs
202
'trade' : self.trades}) # eod trade)
203
204
205
class TradingEnvironment(gym.Env):
206
"""A simple trading environment for reinforcement learning.
207
208
Provides daily observations for a stock price series
209
An episode is defined as a sequence of 252 trading days with random start
210
Each day is a 'step' that allows the agent to choose one of three actions:
211
- 0: SHORT
212
- 1: HOLD
213
- 2: LONG
214
215
Trading has an optional cost (default: 10bps) of the change in position value.
216
Going from short to long implies two trades.
217
Not trading also incurs a default time cost of 1bps per step.
218
219
An episode begins with a starting Net Asset Value (NAV) of 1 unit of cash.
220
If the NAV drops to 0, the episode ends with a loss.
221
If the NAV hits 2.0, the agent wins.
222
223
The trading simulator tracks a buy-and-hold strategy as benchmark.
224
"""
225
metadata = {'render.modes': ['human']}
226
227
def __init__(self,
228
trading_days=252,
229
trading_cost_bps=1e-3,
230
time_cost_bps=1e-4,
231
ticker='AAPL'):
232
self.trading_days = trading_days
233
self.trading_cost_bps = trading_cost_bps
234
self.ticker = ticker
235
self.time_cost_bps = time_cost_bps
236
self.data_source = DataSource(trading_days=self.trading_days,
237
ticker=ticker)
238
self.simulator = TradingSimulator(steps=self.trading_days,
239
trading_cost_bps=self.trading_cost_bps,
240
time_cost_bps=self.time_cost_bps)
241
self.action_space = spaces.Discrete(3)
242
self.observation_space = spaces.Box(self.data_source.min_values,
243
self.data_source.max_values)
244
self.reset()
245
246
def seed(self, seed=None):
247
self.np_random, seed = seeding.np_random(seed)
248
return [seed]
249
250
def step(self, action):
251
"""Returns state observation, reward, done and info"""
252
assert self.action_space.contains(action), '{} {} invalid'.format(action, type(action))
253
observation, done = self.data_source.take_step()
254
reward, info = self.simulator.take_step(action=action,
255
market_return=observation[0])
256
return observation, reward, done, info
257
258
def reset(self):
259
"""Resets DataSource and TradingSimulator; returns first observation"""
260
self.data_source.reset()
261
self.simulator.reset()
262
return self.data_source.take_step()[0]
263
264
# TODO
265
def render(self, mode='human'):
266
"""Not implemented"""
267
pass
268
269