Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
AI4Finance-Foundation
GitHub Repository: AI4Finance-Foundation/FinRL
Path: blob/master/finrl/meta/env_stock_trading/env_stocktrading_cashpenalty.py
732 views
1
from __future__ import annotations
2
3
import random
4
import time
5
from copy import deepcopy
6
7
import gym
8
import matplotlib
9
import numpy as np
10
import pandas as pd
11
from gym import spaces
12
from stable_baselines3.common import logger
13
from stable_baselines3.common.vec_env import DummyVecEnv
14
from stable_baselines3.common.vec_env import SubprocVecEnv
15
16
matplotlib.use("Agg")
17
18
19
class StockTradingEnvCashpenalty(gym.Env):
20
"""
21
A stock trading environment for OpenAI gym
22
This environment penalizes the model for not maintaining a reserve of cash.
23
This enables the model to manage cash reserves in addition to performing trading procedures.
24
Reward at any step is given as follows
25
r_i = (sum(cash, asset_value) - initial_cash - max(0, sum(cash, asset_value)*cash_penalty_proportion-cash))/(days_elapsed)
26
This reward function takes into account a liquidity requirement, as well as long-term accrued rewards.
27
Parameters:
28
df (pandas.DataFrame): Dataframe containing data
29
buy_cost_pct (float): cost for buying shares
30
sell_cost_pct (float): cost for selling shares
31
hmax (int, array): maximum cash to be traded in each trade per asset. If an array is provided, then each index correspond to each asset
32
discrete_actions (bool): option to choose whether perform dicretization on actions space or not
33
shares_increment (int): multiples number of shares can be bought in each trade. Only applicable if discrete_actions=True
34
turbulence_threshold (float): Maximum turbulence allowed in market for purchases to occur. If exceeded, positions are liquidated
35
print_verbosity(int): When iterating (step), how often to print stats about state of env
36
initial_amount: (int, float): Amount of cash initially available
37
daily_information_columns (list(str)): Columns to use when building state space from the dataframe. It could be OHLC columns or any other variables such as technical indicators and turbulence index
38
cash_penalty_proportion (int, float): Penalty to apply if the algorithm runs out of cash
39
patient (bool): option to choose whether end the cycle when we're running out of cash or just don't buy anything until we got additional cash
40
41
RL Inputs and Outputs
42
action space: [<n_assets>,] in range {-1, 1}
43
state space: {start_cash, [shares_i for in in assets], [[indicator_j for j in indicators] for i in assets]]}
44
TODO:
45
Organize functions
46
Write README
47
Document tests
48
"""
49
50
metadata = {"render.modes": ["human"]}
51
52
def __init__(
53
self,
54
df,
55
buy_cost_pct=3e-3,
56
sell_cost_pct=3e-3,
57
date_col_name="date",
58
hmax=10,
59
discrete_actions=False,
60
shares_increment=1,
61
turbulence_threshold=None,
62
print_verbosity=10,
63
initial_amount=1e6,
64
daily_information_cols=["open", "close", "high", "low", "volume"],
65
cache_indicator_data=True,
66
cash_penalty_proportion=0.1,
67
random_start=True,
68
patient=False,
69
currency="$",
70
):
71
self.df = df
72
self.stock_col = "tic"
73
self.assets = df[self.stock_col].unique()
74
self.dates = df[date_col_name].sort_values().unique()
75
self.random_start = random_start
76
self.discrete_actions = discrete_actions
77
self.patient = patient
78
self.currency = currency
79
self.df = self.df.set_index(date_col_name)
80
self.shares_increment = shares_increment
81
self.hmax = hmax
82
self.initial_amount = initial_amount
83
self.print_verbosity = print_verbosity
84
self.buy_cost_pct = buy_cost_pct
85
self.sell_cost_pct = sell_cost_pct
86
self.turbulence_threshold = turbulence_threshold
87
self.daily_information_cols = daily_information_cols
88
self.state_space = (
89
1 + len(self.assets) + len(self.assets) * len(self.daily_information_cols)
90
)
91
self.action_space = spaces.Box(low=-1, high=1, shape=(len(self.assets),))
92
self.observation_space = spaces.Box(
93
low=-np.inf, high=np.inf, shape=(self.state_space,)
94
)
95
self.turbulence = 0
96
self.episode = -1 # initialize so we can call reset
97
self.episode_history = []
98
self.printed_header = False
99
self.cache_indicator_data = cache_indicator_data
100
self.cached_data = None
101
self.cash_penalty_proportion = cash_penalty_proportion
102
if self.cache_indicator_data:
103
print("caching data")
104
self.cached_data = [
105
self.get_date_vector(i) for i, _ in enumerate(self.dates)
106
]
107
print("data cached!")
108
109
def seed(self, seed=None):
110
if seed is None:
111
seed = int(round(time.time() * 1000))
112
random.seed(seed)
113
114
@property
115
def current_step(self):
116
return self.date_index - self.starting_point
117
118
@property
119
def cash_on_hand(self):
120
# amount of cash held at current timestep
121
return self.state_memory[-1][0]
122
123
@property
124
def holdings(self):
125
# Quantity of shares held at current timestep
126
return self.state_memory[-1][1 : len(self.assets) + 1]
127
128
@property
129
def closings(self):
130
return np.array(self.get_date_vector(self.date_index, cols=["close"]))
131
132
def reset(
133
self,
134
*,
135
seed=None,
136
options=None,
137
):
138
self.seed()
139
self.sum_trades = 0
140
if self.random_start:
141
starting_point = random.choice(range(int(len(self.dates) * 0.5)))
142
self.starting_point = starting_point
143
else:
144
self.starting_point = 0
145
self.date_index = self.starting_point
146
self.turbulence = 0
147
self.episode += 1
148
self.actions_memory = []
149
self.transaction_memory = []
150
self.state_memory = []
151
self.account_information = {
152
"cash": [],
153
"asset_value": [],
154
"total_assets": [],
155
"reward": [],
156
}
157
init_state = np.array(
158
[self.initial_amount]
159
+ [0] * len(self.assets)
160
+ self.get_date_vector(self.date_index)
161
)
162
self.state_memory.append(init_state)
163
return init_state
164
165
def get_date_vector(self, date, cols=None):
166
if (cols is None) and (self.cached_data is not None):
167
return self.cached_data[date]
168
else:
169
date = self.dates[date]
170
if cols is None:
171
cols = self.daily_information_cols
172
trunc_df = self.df.loc[[date]]
173
v = []
174
for a in self.assets:
175
subset = trunc_df[trunc_df[self.stock_col] == a]
176
v += subset.loc[date, cols].tolist()
177
assert len(v) == len(self.assets) * len(cols)
178
return v
179
180
def return_terminal(self, reason="Last Date", reward=0):
181
state = self.state_memory[-1]
182
self.log_step(reason=reason, terminal_reward=reward)
183
# Add outputs to logger interface
184
gl_pct = self.account_information["total_assets"][-1] / self.initial_amount
185
logger.record("environment/GainLoss_pct", (gl_pct - 1) * 100)
186
logger.record(
187
"environment/total_assets",
188
int(self.account_information["total_assets"][-1]),
189
)
190
reward_pct = self.account_information["total_assets"][-1] / self.initial_amount
191
logger.record("environment/total_reward_pct", (reward_pct - 1) * 100)
192
logger.record("environment/total_trades", self.sum_trades)
193
logger.record(
194
"environment/avg_daily_trades",
195
self.sum_trades / (self.current_step),
196
)
197
logger.record(
198
"environment/avg_daily_trades_per_asset",
199
self.sum_trades / (self.current_step) / len(self.assets),
200
)
201
logger.record("environment/completed_steps", self.current_step)
202
logger.record(
203
"environment/sum_rewards", np.sum(self.account_information["reward"])
204
)
205
logger.record(
206
"environment/cash_proportion",
207
self.account_information["cash"][-1]
208
/ self.account_information["total_assets"][-1],
209
)
210
return state, reward, True, {}
211
212
def log_step(self, reason, terminal_reward=None):
213
if terminal_reward is None:
214
terminal_reward = self.account_information["reward"][-1]
215
cash_pct = (
216
self.account_information["cash"][-1]
217
/ self.account_information["total_assets"][-1]
218
)
219
gl_pct = self.account_information["total_assets"][-1] / self.initial_amount
220
rec = [
221
self.episode,
222
self.date_index - self.starting_point,
223
reason,
224
f"{self.currency}{'{:0,.0f}'.format(float(self.account_information['cash'][-1]))}",
225
f"{self.currency}{'{:0,.0f}'.format(float(self.account_information['total_assets'][-1]))}",
226
f"{terminal_reward*100:0.5f}%",
227
f"{(gl_pct - 1)*100:0.5f}%",
228
f"{cash_pct*100:0.2f}%",
229
]
230
self.episode_history.append(rec)
231
print(self.template.format(*rec))
232
233
def log_header(self):
234
if self.printed_header is False:
235
self.template = "{0:4}|{1:4}|{2:15}|{3:15}|{4:15}|{5:10}|{6:10}|{7:10}" # column widths: 8, 10, 15, 7, 10
236
print(
237
self.template.format(
238
"EPISODE",
239
"STEPS",
240
"TERMINAL_REASON",
241
"CASH",
242
"TOT_ASSETS",
243
"TERMINAL_REWARD_unsc",
244
"GAINLOSS_PCT",
245
"CASH_PROPORTION",
246
)
247
)
248
self.printed_header = True
249
250
def get_reward(self):
251
if self.current_step == 0:
252
return 0
253
else:
254
assets = self.account_information["total_assets"][-1]
255
cash = self.account_information["cash"][-1]
256
cash_penalty = max(0, (assets * self.cash_penalty_proportion - cash))
257
assets -= cash_penalty
258
reward = (assets / self.initial_amount) - 1
259
reward /= self.current_step
260
return reward
261
262
def get_transactions(self, actions):
263
"""
264
This function takes in a raw 'action' from the model and makes it into realistic transactions
265
This function includes logic for discretizing
266
It also includes turbulence logic.
267
"""
268
# record actions of the model
269
self.actions_memory.append(actions)
270
271
# multiply actions by the hmax value
272
actions = actions * self.hmax
273
274
# Do nothing for shares with zero value
275
actions = np.where(self.closings > 0, actions, 0)
276
277
# discretize optionally
278
if self.discrete_actions:
279
# convert into integer because we can't buy fraction of shares
280
actions = actions // self.closings
281
actions = actions.astype(int)
282
# round down actions to the nearest multiplies of shares_increment
283
actions = np.where(
284
actions >= 0,
285
(actions // self.shares_increment) * self.shares_increment,
286
((actions + self.shares_increment) // self.shares_increment)
287
* self.shares_increment,
288
)
289
else:
290
actions = actions / self.closings
291
292
# can't sell more than we have
293
actions = np.maximum(actions, -np.array(self.holdings))
294
295
# deal with turbulence
296
if self.turbulence_threshold is not None:
297
# if turbulence goes over threshold, just clear out all positions
298
if self.turbulence >= self.turbulence_threshold:
299
actions = -(np.array(self.holdings))
300
self.log_step(reason="TURBULENCE")
301
302
return actions
303
304
def step(self, actions):
305
# let's just log what we're doing in terms of max actions at each step.
306
self.sum_trades += np.sum(np.abs(actions))
307
self.log_header()
308
# print if it's time.
309
if (self.current_step + 1) % self.print_verbosity == 0:
310
self.log_step(reason="update")
311
# if we're at the end
312
if self.date_index == len(self.dates) - 1:
313
# if we hit the end, set reward to total gains (or losses)
314
return self.return_terminal(reward=self.get_reward())
315
else:
316
"""
317
First, we need to compute values of holdings, save these, and log everything.
318
Then we can reward our model for its earnings.
319
"""
320
# compute value of cash + assets
321
begin_cash = self.cash_on_hand
322
assert min(self.holdings) >= 0
323
asset_value = np.dot(self.holdings, self.closings)
324
# log the values of cash, assets, and total assets
325
self.account_information["cash"].append(begin_cash)
326
self.account_information["asset_value"].append(asset_value)
327
self.account_information["total_assets"].append(begin_cash + asset_value)
328
329
# compute reward once we've computed the value of things!
330
reward = self.get_reward()
331
self.account_information["reward"].append(reward)
332
333
# Now, let's get down to business at hand.
334
transactions = self.get_transactions(actions)
335
336
# compute our proceeds from sells, and add to cash
337
sells = -np.clip(transactions, -np.inf, 0)
338
proceeds = np.dot(sells, self.closings)
339
costs = proceeds * self.sell_cost_pct
340
coh = begin_cash + proceeds
341
# compute the cost of our buys
342
buys = np.clip(transactions, 0, np.inf)
343
spend = np.dot(buys, self.closings)
344
costs += spend * self.buy_cost_pct
345
# if we run out of cash...
346
if (spend + costs) > coh:
347
if self.patient:
348
# ... just don't buy anything until we got additional cash
349
self.log_step(reason="CASH SHORTAGE")
350
transactions = np.where(transactions > 0, 0, transactions)
351
spend = 0
352
costs = 0
353
else:
354
# ... end the cycle and penalize
355
return self.return_terminal(
356
reason="CASH SHORTAGE", reward=self.get_reward()
357
)
358
self.transaction_memory.append(
359
transactions
360
) # capture what the model's could do
361
# verify we didn't do anything impossible here
362
assert (spend + costs) <= coh
363
# update our holdings
364
coh = coh - spend - costs
365
holdings_updated = self.holdings + transactions
366
self.date_index += 1
367
if self.turbulence_threshold is not None:
368
self.turbulence = self.get_date_vector(
369
self.date_index, cols=["turbulence"]
370
)[0]
371
# Update State
372
state = (
373
[coh] + list(holdings_updated) + self.get_date_vector(self.date_index)
374
)
375
self.state_memory.append(state)
376
return state, reward, False, {}
377
378
def get_sb_env(self):
379
def get_self():
380
return deepcopy(self)
381
382
e = DummyVecEnv([get_self])
383
obs = e.reset()
384
return e, obs
385
386
def get_multiproc_env(self, n=10):
387
def get_self():
388
return deepcopy(self)
389
390
e = SubprocVecEnv([get_self for _ in range(n)], start_method="fork")
391
obs = e.reset()
392
return e, obs
393
394
def save_asset_memory(self):
395
if self.current_step == 0:
396
return None
397
else:
398
self.account_information["date"] = self.dates[
399
-len(self.account_information["cash"]) :
400
]
401
return pd.DataFrame(self.account_information)
402
403
def save_action_memory(self):
404
if self.current_step == 0:
405
return None
406
else:
407
return pd.DataFrame(
408
{
409
"date": self.dates[-len(self.account_information["cash"]) :],
410
"actions": self.actions_memory,
411
"transactions": self.transaction_memory,
412
}
413
)
414
415