Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
AI4Finance-Foundation
GitHub Repository: AI4Finance-Foundation/FinRL
Path: blob/master/finrl/meta/env_portfolio_allocation/env_portfolio.py
732 views
1
from __future__ import annotations
2
3
import gymnasium as gym
4
import matplotlib
5
import matplotlib.pyplot as plt
6
import numpy as np
7
import pandas as pd
8
from gymnasium import spaces
9
from gymnasium.utils import seeding
10
from stable_baselines3.common.vec_env import DummyVecEnv
11
12
matplotlib.use("Agg")
13
14
15
class StockPortfolioEnv(gym.Env):
16
"""A single stock trading environment for OpenAI gym
17
18
Attributes
19
----------
20
df: DataFrame
21
input data
22
stock_dim : int
23
number of unique stocks
24
hmax : int
25
maximum number of shares to trade
26
initial_amount : int
27
start money
28
transaction_cost_pct: float
29
transaction cost percentage per trade
30
reward_scaling: float
31
scaling factor for reward, good for training
32
state_space: int
33
the dimension of input features
34
action_space: int
35
equals stock dimension
36
tech_indicator_list: list
37
a list of technical indicator names
38
turbulence_threshold: int
39
a threshold to control risk aversion
40
day: int
41
an increment number to control date
42
43
Methods
44
-------
45
_sell_stock()
46
perform sell action based on the sign of the action
47
_buy_stock()
48
perform buy action based on the sign of the action
49
step()
50
at each step the agent will return actions, then
51
we will calculate the reward, and return the next observation.
52
reset()
53
reset the environment
54
render()
55
use render to return other functions
56
save_asset_memory()
57
return account value at each time step
58
save_action_memory()
59
return actions/positions at each time step
60
61
62
"""
63
64
metadata = {"render.modes": ["human"]}
65
66
def __init__(
67
self,
68
df,
69
stock_dim,
70
hmax,
71
initial_amount,
72
transaction_cost_pct,
73
reward_scaling,
74
state_space,
75
action_space,
76
tech_indicator_list,
77
turbulence_threshold=None,
78
lookback=252,
79
day=0,
80
):
81
# super(StockEnv, self).__init__()
82
# money = 10 , scope = 1
83
self.day = day
84
self.lookback = lookback
85
self.df = df
86
self.stock_dim = stock_dim
87
self.hmax = hmax
88
self.initial_amount = initial_amount
89
self.transaction_cost_pct = transaction_cost_pct
90
self.reward_scaling = reward_scaling
91
self.state_space = state_space
92
self.action_space = action_space
93
self.tech_indicator_list = tech_indicator_list
94
95
# action_space normalization and shape is self.stock_dim
96
self.action_space = spaces.Box(low=0, high=1, shape=(self.action_space,))
97
# Shape = (34, 30)
98
# covariance matrix + technical indicators
99
self.observation_space = spaces.Box(
100
low=-np.inf,
101
high=np.inf,
102
shape=(self.state_space + len(self.tech_indicator_list), self.state_space),
103
)
104
105
# load data from a pandas dataframe
106
self.data = self.df.loc[self.day, :]
107
self.covs = self.data["cov_list"].values[0]
108
self.state = np.append(
109
np.array(self.covs),
110
[self.data[tech].values.tolist() for tech in self.tech_indicator_list],
111
axis=0,
112
)
113
self.terminal = False
114
self.turbulence_threshold = turbulence_threshold
115
# initalize state: inital portfolio return + individual stock return + individual weights
116
self.portfolio_value = self.initial_amount
117
118
# memorize portfolio value each step
119
self.asset_memory = [self.initial_amount]
120
# memorize portfolio return each step
121
self.portfolio_return_memory = [0]
122
self.actions_memory = [[1 / self.stock_dim] * self.stock_dim]
123
self.date_memory = [self.data.date.unique()[0]]
124
125
def step(self, actions):
126
# print(self.day)
127
self.terminal = self.day >= len(self.df.index.unique()) - 1
128
# print(actions)
129
130
if self.terminal:
131
df = pd.DataFrame(self.portfolio_return_memory)
132
df.columns = ["daily_return"]
133
plt.plot(df.daily_return.cumsum(), "r")
134
plt.savefig("results/cumulative_reward.png")
135
plt.close()
136
137
plt.plot(self.portfolio_return_memory, "r")
138
plt.savefig("results/rewards.png")
139
plt.close()
140
141
print("=================================")
142
print(f"begin_total_asset:{self.asset_memory[0]}")
143
print(f"end_total_asset:{self.portfolio_value}")
144
145
df_daily_return = pd.DataFrame(self.portfolio_return_memory)
146
df_daily_return.columns = ["daily_return"]
147
if df_daily_return["daily_return"].std() != 0:
148
sharpe = (
149
(252**0.5)
150
* df_daily_return["daily_return"].mean()
151
/ df_daily_return["daily_return"].std()
152
)
153
print("Sharpe: ", sharpe)
154
print("=================================")
155
156
return self.state, self.reward, self.terminal, False, {}
157
158
else:
159
# print("Model actions: ",actions)
160
# actions are the portfolio weight
161
# normalize to sum of 1
162
# if (np.array(actions) - np.array(actions).min()).sum() != 0:
163
# norm_actions = (np.array(actions) - np.array(actions).min()) /
164
# (np.array(actions) - np.array(actions).min()).sum()
165
# else:
166
# norm_actions = actions
167
weights = self.softmax_normalization(actions)
168
# print("Normalized actions: ", weights)
169
self.actions_memory.append(weights)
170
last_day_memory = self.data
171
172
# load next state
173
self.day += 1
174
self.data = self.df.loc[self.day, :]
175
self.covs = self.data["cov_list"].values[0]
176
self.state = np.append(
177
np.array(self.covs),
178
[self.data[tech].values.tolist() for tech in self.tech_indicator_list],
179
axis=0,
180
)
181
# print(self.state)
182
# calcualte portfolio return
183
# individual stocks' return * weight
184
portfolio_return = sum(
185
((self.data.close.values / last_day_memory.close.values) - 1) * weights
186
)
187
# update portfolio value
188
new_portfolio_value = self.portfolio_value * (1 + portfolio_return)
189
self.portfolio_value = new_portfolio_value
190
191
# save into memory
192
self.portfolio_return_memory.append(portfolio_return)
193
self.date_memory.append(self.data.date.unique()[0])
194
self.asset_memory.append(new_portfolio_value)
195
196
# the reward is the new portfolio value or end portfolo value
197
self.reward = new_portfolio_value
198
# print("Step reward: ", self.reward)
199
# self.reward = self.reward*self.reward_scaling
200
201
return self.state, self.reward, self.terminal, False, {}
202
203
def reset(
204
self,
205
*,
206
seed=None,
207
options=None,
208
):
209
self.asset_memory = [self.initial_amount]
210
self.day = 0
211
self.data = self.df.loc[self.day, :]
212
# load states
213
self.covs = self.data["cov_list"].values[0]
214
self.state = np.append(
215
np.array(self.covs),
216
[self.data[tech].values.tolist() for tech in self.tech_indicator_list],
217
axis=0,
218
)
219
self.portfolio_value = self.initial_amount
220
# self.cost = 0
221
# self.trades = 0
222
self.terminal = False
223
self.portfolio_return_memory = [0]
224
self.actions_memory = [[1 / self.stock_dim] * self.stock_dim]
225
self.date_memory = [self.data.date.unique()[0]]
226
return self.state, {}
227
228
def render(self, mode="human"):
229
return self.state
230
231
def softmax_normalization(self, actions):
232
numerator = np.exp(actions)
233
denominator = np.sum(np.exp(actions))
234
softmax_output = numerator / denominator
235
return softmax_output
236
237
def save_asset_memory(self):
238
date_list = self.date_memory
239
portfolio_return = self.portfolio_return_memory
240
# print(len(date_list))
241
# print(len(asset_list))
242
df_account_value = pd.DataFrame(
243
{"date": date_list, "daily_return": portfolio_return}
244
)
245
return df_account_value
246
247
def save_action_memory(self):
248
# date and close price length must match actions length
249
date_list = self.date_memory
250
df_date = pd.DataFrame(date_list)
251
df_date.columns = ["date"]
252
253
action_list = self.actions_memory
254
df_actions = pd.DataFrame(action_list)
255
df_actions.columns = self.data.tic.values
256
df_actions.index = df_date.date
257
# df_actions = pd.DataFrame({'date':date_list,'actions':action_list})
258
return df_actions
259
260
def _seed(self, seed=None):
261
self.np_random, seed = seeding.np_random(seed)
262
return [seed]
263
264
def get_sb_env(self):
265
e = DummyVecEnv([lambda: self])
266
obs = e.reset()
267
return e, obs
268
269