Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
AI4Finance-Foundation
GitHub Repository: AI4Finance-Foundation/FinRL
Path: blob/master/finrl/meta/env_stock_trading/env_stocktrading_stoploss.py
732 views
1
from __future__ import annotations
2
3
import random
4
import time
5
from copy import deepcopy
6
7
import gym
8
import matplotlib
9
import numpy as np
10
import pandas as pd
11
from gym import spaces
12
from stable_baselines3.common import logger
13
from stable_baselines3.common.vec_env import DummyVecEnv
14
from stable_baselines3.common.vec_env import SubprocVecEnv
15
16
matplotlib.use("Agg")
17
18
19
class StockTradingEnvStopLoss(gym.Env):
20
"""
21
A stock trading environment for OpenAI gym
22
This environment penalizes the model if excedeed the stop-loss threshold, selling assets with under expectation %profit, and also
23
for not maintaining a reserve of cash.
24
This enables the model to do trading with high confidence and manage cash reserves in addition to performing trading procedures.
25
26
Reward at any step is given as follows
27
r_i = (sum(cash, asset_value) + additional_reward - total_penalty - initial_cash) / initial_cash / days_elapsed
28
, where total_penalty = cash_penalty + stop_loss_penalty + low_profit_penalty
29
cash_penalty = max(0, sum(cash, asset_value)*cash_penalty_proportion-cash)
30
stop_loss_penalty = -1 * dot(holdings,negative_closing_diff_avg_buy)
31
low_profit_penalty = -1 * dot(holdings,negative_profit_sell_diff_avg_buy)
32
additional_reward = dot(holdings,positive_profit_sell_diff_avg_buy)
33
34
This reward function takes into account a profit/loss ratio constraint, liquidity requirement, as well as long-term accrued rewards.
35
This reward function also forces the model to trade only when it's really confident to do so.
36
37
Parameters:
38
state space: {start_cash, <owned_shares>, for s in stocks{<stock.values>}, }
39
df (pandas.DataFrame): Dataframe containing data
40
buy_cost_pct (float): cost for buying shares
41
sell_cost_pct (float): cost for selling shares
42
hmax (int): max number of share purchases allowed per asset
43
discrete_actions (bool): option to choose whether perform dicretization on actions space or not
44
shares_increment (int): multiples number of shares can be bought in each trade.
45
stoploss_penalty (float): Maximum loss we can tolerate. Valid value range is between 0 and 1. If x is specified, then agent will force sell all holdings for a particular asset if current price < x * avg_buy_price
46
profit_loss_ratio (int, float): Expected profit/loss ratio. Only applicable when stoploss_penalty < 1.
47
turbulence_threshold (float): Maximum turbulence allowed in market for purchases to occur. If exceeded, positions are liquidated
48
print_verbosity(int): When iterating (step), how often to print stats about state of env
49
initial_amount: (int, float): Amount of cash initially available
50
daily_information_columns (list(str)): Columns to use when building state space from the dataframe. It could be OHLC columns or any other variables such as technical indicators and turbulence index
51
cash_penalty_proportion (int, float): Penalty to apply if the algorithm runs out of cash
52
patient (bool): option to choose whether end the cycle when we're running out of cash or just don't buy anything until we got additional cash
53
action space: <share_dollar_purchases>
54
TODO:
55
add holdings to memory
56
move transactions to after the clip step.
57
tests:
58
after reset, static strategy should result in same metrics
59
given no change in prices, no change in asset values
60
"""
61
62
metadata = {"render.modes": ["human"]}
63
64
def __init__(
65
self,
66
df,
67
buy_cost_pct=3e-3,
68
sell_cost_pct=3e-3,
69
date_col_name="date",
70
hmax=10,
71
discrete_actions=False,
72
shares_increment=1,
73
stoploss_penalty=0.9,
74
profit_loss_ratio=2,
75
turbulence_threshold=None,
76
print_verbosity=10,
77
initial_amount=1e6,
78
daily_information_cols=["open", "close", "high", "low", "volume"],
79
cache_indicator_data=True,
80
cash_penalty_proportion=0.1,
81
random_start=True,
82
patient=False,
83
currency="$",
84
):
85
self.df = df
86
self.stock_col = "tic"
87
self.assets = df[self.stock_col].unique()
88
self.dates = df[date_col_name].sort_values().unique()
89
self.random_start = random_start
90
self.discrete_actions = discrete_actions
91
self.patient = patient
92
self.currency = currency
93
self.df = self.df.set_index(date_col_name)
94
self.shares_increment = shares_increment
95
self.hmax = hmax
96
self.initial_amount = initial_amount
97
self.print_verbosity = print_verbosity
98
self.buy_cost_pct = buy_cost_pct
99
self.sell_cost_pct = sell_cost_pct
100
self.stoploss_penalty = stoploss_penalty
101
self.min_profit_penalty = 1 + profit_loss_ratio * (1 - self.stoploss_penalty)
102
self.turbulence_threshold = turbulence_threshold
103
self.daily_information_cols = daily_information_cols
104
self.state_space = (
105
1 + len(self.assets) + len(self.assets) * len(self.daily_information_cols)
106
)
107
self.action_space = spaces.Box(low=-1, high=1, shape=(len(self.assets),))
108
self.observation_space = spaces.Box(
109
low=-np.inf, high=np.inf, shape=(self.state_space,)
110
)
111
self.turbulence = 0
112
self.episode = -1 # initialize so we can call reset
113
self.episode_history = []
114
self.printed_header = False
115
self.cache_indicator_data = cache_indicator_data
116
self.cached_data = None
117
self.cash_penalty_proportion = cash_penalty_proportion
118
if self.cache_indicator_data:
119
print("caching data")
120
self.cached_data = [
121
self.get_date_vector(i) for i, _ in enumerate(self.dates)
122
]
123
print("data cached!")
124
125
def seed(self, seed=None):
126
if seed is None:
127
seed = int(round(time.time() * 1000))
128
random.seed(seed)
129
130
@property
131
def current_step(self):
132
return self.date_index - self.starting_point
133
134
def reset(
135
self,
136
*,
137
seed=None,
138
options=None,
139
):
140
self.seed()
141
self.sum_trades = 0
142
self.actual_num_trades = 0
143
self.closing_diff_avg_buy = np.zeros(len(self.assets))
144
self.profit_sell_diff_avg_buy = np.zeros(len(self.assets))
145
self.n_buys = np.zeros(len(self.assets))
146
self.avg_buy_price = np.zeros(len(self.assets))
147
if self.random_start:
148
starting_point = random.choice(range(int(len(self.dates) * 0.5)))
149
self.starting_point = starting_point
150
else:
151
self.starting_point = 0
152
self.date_index = self.starting_point
153
self.turbulence = 0
154
self.episode += 1
155
self.actions_memory = []
156
self.transaction_memory = []
157
self.state_memory = []
158
self.account_information = {
159
"cash": [],
160
"asset_value": [],
161
"total_assets": [],
162
"reward": [],
163
}
164
init_state = np.array(
165
[self.initial_amount]
166
+ [0] * len(self.assets)
167
+ self.get_date_vector(self.date_index)
168
)
169
self.state_memory.append(init_state)
170
return init_state
171
172
def get_date_vector(self, date, cols=None):
173
if (cols is None) and (self.cached_data is not None):
174
return self.cached_data[date]
175
else:
176
date = self.dates[date]
177
if cols is None:
178
cols = self.daily_information_cols
179
trunc_df = self.df.loc[[date]]
180
v = []
181
for a in self.assets:
182
subset = trunc_df[trunc_df[self.stock_col] == a]
183
v += subset.loc[date, cols].tolist()
184
assert len(v) == len(self.assets) * len(cols)
185
return v
186
187
def return_terminal(self, reason="Last Date", reward=0):
188
state = self.state_memory[-1]
189
self.log_step(reason=reason, terminal_reward=reward)
190
# Add outputs to logger interface
191
gl_pct = self.account_information["total_assets"][-1] / self.initial_amount
192
logger.record("environment/GainLoss_pct", (gl_pct - 1) * 100)
193
logger.record(
194
"environment/total_assets",
195
int(self.account_information["total_assets"][-1]),
196
)
197
reward_pct = self.account_information["total_assets"][-1] / self.initial_amount
198
logger.record("environment/total_reward_pct", (reward_pct - 1) * 100)
199
logger.record("environment/total_trades", self.sum_trades)
200
logger.record(
201
"environment/actual_num_trades",
202
self.actual_num_trades,
203
)
204
logger.record(
205
"environment/avg_daily_trades",
206
self.sum_trades / (self.current_step),
207
)
208
logger.record(
209
"environment/avg_daily_trades_per_asset",
210
self.sum_trades / (self.current_step) / len(self.assets),
211
)
212
logger.record("environment/completed_steps", self.current_step)
213
logger.record(
214
"environment/sum_rewards", np.sum(self.account_information["reward"])
215
)
216
logger.record(
217
"environment/cash_proportion",
218
self.account_information["cash"][-1]
219
/ self.account_information["total_assets"][-1],
220
)
221
return state, reward, True, {}
222
223
def log_step(self, reason, terminal_reward=None):
224
if terminal_reward is None:
225
terminal_reward = self.account_information["reward"][-1]
226
cash_pct = (
227
self.account_information["cash"][-1]
228
/ self.account_information["total_assets"][-1]
229
)
230
gl_pct = self.account_information["total_assets"][-1] / self.initial_amount
231
rec = [
232
self.episode,
233
self.date_index - self.starting_point,
234
reason,
235
f"{self.currency}{'{:0,.0f}'.format(float(self.account_information['cash'][-1]))}",
236
f"{self.currency}{'{:0,.0f}'.format(float(self.account_information['total_assets'][-1]))}",
237
f"{terminal_reward*100:0.5f}%",
238
f"{(gl_pct - 1)*100:0.5f}%",
239
f"{cash_pct*100:0.2f}%",
240
]
241
self.episode_history.append(rec)
242
print(self.template.format(*rec))
243
244
def log_header(self):
245
self.template = "{0:4}|{1:4}|{2:15}|{3:15}|{4:15}|{5:10}|{6:10}|{7:10}" # column widths: 8, 10, 15, 7, 10
246
print(
247
self.template.format(
248
"EPISODE",
249
"STEPS",
250
"TERMINAL_REASON",
251
"CASH",
252
"TOT_ASSETS",
253
"TERMINAL_REWARD_unsc",
254
"GAINLOSS_PCT",
255
"CASH_PROPORTION",
256
)
257
)
258
self.printed_header = True
259
260
def get_reward(self):
261
if self.current_step == 0:
262
return 0
263
else:
264
total_assets = self.account_information["total_assets"][-1]
265
cash = self.account_information["cash"][-1]
266
holdings = self.state_memory[-1][1 : len(self.assets) + 1]
267
neg_closing_diff_avg_buy = np.clip(self.closing_diff_avg_buy, -np.inf, 0)
268
neg_profit_sell_diff_avg_buy = np.clip(
269
self.profit_sell_diff_avg_buy, -np.inf, 0
270
)
271
pos_profit_sell_diff_avg_buy = np.clip(
272
self.profit_sell_diff_avg_buy, 0, np.inf
273
)
274
275
cash_penalty = max(0, (total_assets * self.cash_penalty_proportion - cash))
276
if self.current_step > 1:
277
prev_holdings = self.state_memory[-2][1 : len(self.assets) + 1]
278
stop_loss_penalty = -1 * np.dot(
279
np.array(prev_holdings), neg_closing_diff_avg_buy
280
)
281
else:
282
stop_loss_penalty = 0
283
low_profit_penalty = -1 * np.dot(
284
np.array(holdings), neg_profit_sell_diff_avg_buy
285
)
286
total_penalty = cash_penalty + stop_loss_penalty + low_profit_penalty
287
288
additional_reward = np.dot(np.array(holdings), pos_profit_sell_diff_avg_buy)
289
290
reward = (
291
(total_assets - total_penalty + additional_reward) / self.initial_amount
292
) - 1
293
reward /= self.current_step
294
295
return reward
296
297
def step(self, actions):
298
# let's just log what we're doing in terms of max actions at each step.
299
self.sum_trades += np.sum(np.abs(actions))
300
# print header only first time
301
if self.printed_header is False:
302
self.log_header()
303
# print if it's time.
304
if (self.current_step + 1) % self.print_verbosity == 0:
305
self.log_step(reason="update")
306
# if we're at the end
307
if self.date_index == len(self.dates) - 1:
308
# if we hit the end, set reward to total gains (or losses)
309
return self.return_terminal(reward=self.get_reward())
310
else:
311
# compute value of cash + assets
312
begin_cash = self.state_memory[-1][0]
313
holdings = self.state_memory[-1][1 : len(self.assets) + 1]
314
assert min(holdings) >= 0
315
closings = np.array(self.get_date_vector(self.date_index, cols=["close"]))
316
asset_value = np.dot(holdings, closings)
317
# reward is (cash + assets) - (cash_last_step + assets_last_step)
318
reward = self.get_reward()
319
# log the values of cash, assets, and total assets
320
self.account_information["cash"].append(begin_cash)
321
self.account_information["asset_value"].append(asset_value)
322
self.account_information["total_assets"].append(begin_cash + asset_value)
323
self.account_information["reward"].append(reward)
324
325
# multiply action values by our scalar multiplier and save
326
actions = actions * self.hmax
327
self.actions_memory.append(
328
actions * closings
329
) # capture what the model's trying to do
330
# buy/sell only if the price is > 0 (no missing data in this particular date)
331
actions = np.where(closings > 0, actions, 0)
332
if self.turbulence_threshold is not None:
333
# if turbulence goes over threshold, just clear out all positions
334
if self.turbulence >= self.turbulence_threshold:
335
actions = -(np.array(holdings) * closings)
336
self.log_step(reason="TURBULENCE")
337
# scale cash purchases to asset
338
if self.discrete_actions:
339
# convert into integer because we can't buy fraction of shares
340
actions = np.where(closings > 0, actions // closings, 0)
341
actions = actions.astype(int)
342
# round down actions to the nearest multiplies of shares_increment
343
actions = np.where(
344
actions >= 0,
345
(actions // self.shares_increment) * self.shares_increment,
346
((actions + self.shares_increment) // self.shares_increment)
347
* self.shares_increment,
348
)
349
else:
350
actions = np.where(closings > 0, actions / closings, 0)
351
352
# clip actions so we can't sell more assets than we hold
353
actions = np.maximum(actions, -np.array(holdings))
354
355
self.closing_diff_avg_buy = closings - (
356
self.stoploss_penalty * self.avg_buy_price
357
)
358
if begin_cash >= self.stoploss_penalty * self.initial_amount:
359
# clear out position if stop-loss criteria is met
360
actions = np.where(
361
self.closing_diff_avg_buy < 0, -np.array(holdings), actions
362
)
363
364
if any(np.clip(self.closing_diff_avg_buy, -np.inf, 0) < 0):
365
self.log_step(reason="STOP LOSS")
366
367
# compute our proceeds from sells, and add to cash
368
sells = -np.clip(actions, -np.inf, 0)
369
proceeds = np.dot(sells, closings)
370
costs = proceeds * self.sell_cost_pct
371
coh = begin_cash + proceeds
372
# compute the cost of our buys
373
buys = np.clip(actions, 0, np.inf)
374
spend = np.dot(buys, closings)
375
costs += spend * self.buy_cost_pct
376
# if we run out of cash...
377
if (spend + costs) > coh:
378
if self.patient:
379
# ... just don't buy anything until we got additional cash
380
self.log_step(reason="CASH SHORTAGE")
381
actions = np.where(actions > 0, 0, actions)
382
spend = 0
383
costs = 0
384
else:
385
# ... end the cycle and penalize
386
return self.return_terminal(
387
reason="CASH SHORTAGE", reward=self.get_reward()
388
)
389
390
self.transaction_memory.append(actions) # capture what the model's could do
391
392
# get profitable sell actions
393
sell_closing_price = np.where(
394
sells > 0, closings, 0
395
) # get closing price of assets that we sold
396
profit_sell = np.where(
397
sell_closing_price - self.avg_buy_price > 0, 1, 0
398
) # mark the one which is profitable
399
400
self.profit_sell_diff_avg_buy = np.where(
401
profit_sell == 1,
402
closings - (self.min_profit_penalty * self.avg_buy_price),
403
0,
404
)
405
406
if any(np.clip(self.profit_sell_diff_avg_buy, -np.inf, 0) < 0):
407
self.log_step(reason="LOW PROFIT")
408
else:
409
if any(np.clip(self.profit_sell_diff_avg_buy, 0, np.inf) > 0):
410
self.log_step(reason="HIGH PROFIT")
411
412
# verify we didn't do anything impossible here
413
assert (spend + costs) <= coh
414
415
# log actual total trades we did up to current step
416
self.actual_num_trades = np.sum(np.abs(np.sign(actions)))
417
418
# update our holdings
419
coh = coh - spend - costs
420
holdings_updated = holdings + actions
421
422
# Update average buy price
423
buys = np.sign(buys)
424
self.n_buys += buys
425
self.avg_buy_price = np.where(
426
buys > 0,
427
self.avg_buy_price + ((closings - self.avg_buy_price) / self.n_buys),
428
self.avg_buy_price,
429
) # incremental average
430
431
# set as zero when we don't have any holdings anymore
432
self.n_buys = np.where(holdings_updated > 0, self.n_buys, 0)
433
self.avg_buy_price = np.where(holdings_updated > 0, self.avg_buy_price, 0)
434
435
self.date_index += 1
436
if self.turbulence_threshold is not None:
437
self.turbulence = self.get_date_vector(
438
self.date_index, cols=["turbulence"]
439
)[0]
440
441
# Update State
442
state = (
443
[coh] + list(holdings_updated) + self.get_date_vector(self.date_index)
444
)
445
self.state_memory.append(state)
446
447
return state, reward, False, {}
448
449
def get_sb_env(self):
450
def get_self():
451
return deepcopy(self)
452
453
e = DummyVecEnv([get_self])
454
obs = e.reset()
455
return e, obs
456
457
def get_multiproc_env(self, n=10):
458
def get_self():
459
return deepcopy(self)
460
461
e = SubprocVecEnv([get_self for _ in range(n)], start_method="fork")
462
obs = e.reset()
463
return e, obs
464
465
def save_asset_memory(self):
466
if self.current_step == 0:
467
return None
468
else:
469
self.account_information["date"] = self.dates[
470
-len(self.account_information["cash"]) :
471
]
472
return pd.DataFrame(self.account_information)
473
474
def save_action_memory(self):
475
if self.current_step == 0:
476
return None
477
else:
478
return pd.DataFrame(
479
{
480
"date": self.dates[-len(self.account_information["cash"]) :],
481
"actions": self.actions_memory,
482
"transactions": self.transaction_memory,
483
}
484
)
485
486