CoCalc -- env_portfolio.py

GitHub Repository: AI4Finance-Foundation/FinRL
Path: blob/master/finrl/meta/env_portfolio_allocation/env_portfolio.py
⁷³² views
1
from __future__ import annotations
2

3
import gymnasium as gym
4
import matplotlib
5
import matplotlib.pyplot as plt
6
import numpy as np
7
import pandas as pd
8
from gymnasium import spaces
9
from gymnasium.utils import seeding
10
from stable_baselines3.common.vec_env import DummyVecEnv
11

12
matplotlib.use("Agg")
13

14

15
class StockPortfolioEnv(gym.Env):
16
    """A single stock trading environment for OpenAI gym
17

18
    Attributes
19
    ----------
20
        df: DataFrame
21
            input data
22
        stock_dim : int
23
            number of unique stocks
24
        hmax : int
25
            maximum number of shares to trade
26
        initial_amount : int
27
            start money
28
        transaction_cost_pct: float
29
            transaction cost percentage per trade
30
        reward_scaling: float
31
            scaling factor for reward, good for training
32
        state_space: int
33
            the dimension of input features
34
        action_space: int
35
            equals stock dimension
36
        tech_indicator_list: list
37
            a list of technical indicator names
38
        turbulence_threshold: int
39
            a threshold to control risk aversion
40
        day: int
41
            an increment number to control date
42

43
    Methods
44
    -------
45
    _sell_stock()
46
        perform sell action based on the sign of the action
47
    _buy_stock()
48
        perform buy action based on the sign of the action
49
    step()
50
        at each step the agent will return actions, then
51
        we will calculate the reward, and return the next observation.
52
    reset()
53
        reset the environment
54
    render()
55
        use render to return other functions
56
    save_asset_memory()
57
        return account value at each time step
58
    save_action_memory()
59
        return actions/positions at each time step
60

61

62
    """
63

64
    metadata = {"render.modes": ["human"]}
65

66
    def __init__(
67
        self,
68
        df,
69
        stock_dim,
70
        hmax,
71
        initial_amount,
72
        transaction_cost_pct,
73
        reward_scaling,
74
        state_space,
75
        action_space,
76
        tech_indicator_list,
77
        turbulence_threshold=None,
78
        lookback=252,
79
        day=0,
80
    ):
81
        # super(StockEnv, self).__init__()
82
        # money = 10 , scope = 1
83
        self.day = day
84
        self.lookback = lookback
85
        self.df = df
86
        self.stock_dim = stock_dim
87
        self.hmax = hmax
88
        self.initial_amount = initial_amount
89
        self.transaction_cost_pct = transaction_cost_pct
90
        self.reward_scaling = reward_scaling
91
        self.state_space = state_space
92
        self.action_space = action_space
93
        self.tech_indicator_list = tech_indicator_list
94

95
        # action_space normalization and shape is self.stock_dim
96
        self.action_space = spaces.Box(low=0, high=1, shape=(self.action_space,))
97
        # Shape = (34, 30)
98
        # covariance matrix + technical indicators
99
        self.observation_space = spaces.Box(
100
            low=-np.inf,
101
            high=np.inf,
102
            shape=(self.state_space + len(self.tech_indicator_list), self.state_space),
103
        )
104

105
        # load data from a pandas dataframe
106
        self.data = self.df.loc[self.day, :]
107
        self.covs = self.data["cov_list"].values[0]
108
        self.state = np.append(
109
            np.array(self.covs),
110
            [self.data[tech].values.tolist() for tech in self.tech_indicator_list],
111
            axis=0,
112
        )
113
        self.terminal = False
114
        self.turbulence_threshold = turbulence_threshold
115
        # initalize state: inital portfolio return + individual stock return + individual weights
116
        self.portfolio_value = self.initial_amount
117

118
        # memorize portfolio value each step
119
        self.asset_memory = [self.initial_amount]
120
        # memorize portfolio return each step
121
        self.portfolio_return_memory = [0]
122
        self.actions_memory = [[1 / self.stock_dim] * self.stock_dim]
123
        self.date_memory = [self.data.date.unique()[0]]
124

125
    def step(self, actions):
126
        # print(self.day)
127
        self.terminal = self.day >= len(self.df.index.unique()) - 1
128
        # print(actions)
129

130
        if self.terminal:
131
            df = pd.DataFrame(self.portfolio_return_memory)
132
            df.columns = ["daily_return"]
133
            plt.plot(df.daily_return.cumsum(), "r")
134
            plt.savefig("results/cumulative_reward.png")
135
            plt.close()
136

137
            plt.plot(self.portfolio_return_memory, "r")
138
            plt.savefig("results/rewards.png")
139
            plt.close()
140

141
            print("=================================")
142
            print(f"begin_total_asset:{self.asset_memory[0]}")
143
            print(f"end_total_asset:{self.portfolio_value}")
144

145
            df_daily_return = pd.DataFrame(self.portfolio_return_memory)
146
            df_daily_return.columns = ["daily_return"]
147
            if df_daily_return["daily_return"].std() != 0:
148
                sharpe = (
149
                    (252**0.5)
150
                    * df_daily_return["daily_return"].mean()
151
                    / df_daily_return["daily_return"].std()
152
                )
153
                print("Sharpe: ", sharpe)
154
            print("=================================")
155

156
            return self.state, self.reward, self.terminal, False, {}
157

158
        else:
159
            # print("Model actions: ",actions)
160
            # actions are the portfolio weight
161
            # normalize to sum of 1
162
            # if (np.array(actions) - np.array(actions).min()).sum() != 0:
163
            #  norm_actions = (np.array(actions) - np.array(actions).min()) /
164
            #                   (np.array(actions) - np.array(actions).min()).sum()
165
            # else:
166
            #  norm_actions = actions
167
            weights = self.softmax_normalization(actions)
168
            # print("Normalized actions: ", weights)
169
            self.actions_memory.append(weights)
170
            last_day_memory = self.data
171

172
            # load next state
173
            self.day += 1
174
            self.data = self.df.loc[self.day, :]
175
            self.covs = self.data["cov_list"].values[0]
176
            self.state = np.append(
177
                np.array(self.covs),
178
                [self.data[tech].values.tolist() for tech in self.tech_indicator_list],
179
                axis=0,
180
            )
181
            # print(self.state)
182
            # calcualte portfolio return
183
            # individual stocks' return * weight
184
            portfolio_return = sum(
185
                ((self.data.close.values / last_day_memory.close.values) - 1) * weights
186
            )
187
            # update portfolio value
188
            new_portfolio_value = self.portfolio_value * (1 + portfolio_return)
189
            self.portfolio_value = new_portfolio_value
190

191
            # save into memory
192
            self.portfolio_return_memory.append(portfolio_return)
193
            self.date_memory.append(self.data.date.unique()[0])
194
            self.asset_memory.append(new_portfolio_value)
195

196
            # the reward is the new portfolio value or end portfolo value
197
            self.reward = new_portfolio_value
198
            # print("Step reward: ", self.reward)
199
            # self.reward = self.reward*self.reward_scaling
200

201
        return self.state, self.reward, self.terminal, False, {}
202

203
    def reset(
204
        self,
205
        *,
206
        seed=None,
207
        options=None,
208
    ):
209
        self.asset_memory = [self.initial_amount]
210
        self.day = 0
211
        self.data = self.df.loc[self.day, :]
212
        # load states
213
        self.covs = self.data["cov_list"].values[0]
214
        self.state = np.append(
215
            np.array(self.covs),
216
            [self.data[tech].values.tolist() for tech in self.tech_indicator_list],
217
            axis=0,
218
        )
219
        self.portfolio_value = self.initial_amount
220
        # self.cost = 0
221
        # self.trades = 0
222
        self.terminal = False
223
        self.portfolio_return_memory = [0]
224
        self.actions_memory = [[1 / self.stock_dim] * self.stock_dim]
225
        self.date_memory = [self.data.date.unique()[0]]
226
        return self.state, {}
227

228
    def render(self, mode="human"):
229
        return self.state
230

231
    def softmax_normalization(self, actions):
232
        numerator = np.exp(actions)
233
        denominator = np.sum(np.exp(actions))
234
        softmax_output = numerator / denominator
235
        return softmax_output
236

237
    def save_asset_memory(self):
238
        date_list = self.date_memory
239
        portfolio_return = self.portfolio_return_memory
240
        # print(len(date_list))
241
        # print(len(asset_list))
242
        df_account_value = pd.DataFrame(
243
            {"date": date_list, "daily_return": portfolio_return}
244
        )
245
        return df_account_value
246

247
    def save_action_memory(self):
248
        # date and close price length must match actions length
249
        date_list = self.date_memory
250
        df_date = pd.DataFrame(date_list)
251
        df_date.columns = ["date"]
252

253
        action_list = self.actions_memory
254
        df_actions = pd.DataFrame(action_list)
255
        df_actions.columns = self.data.tic.values
256
        df_actions.index = df_date.date
257
        # df_actions = pd.DataFrame({'date':date_list,'actions':action_list})
258
        return df_actions
259

260
    def _seed(self, seed=None):
261
        self.np_random, seed = seeding.np_random(seed)
262
        return [seed]
263

264
    def get_sb_env(self):
265
        e = DummyVecEnv([lambda: self])
266
        obs = e.reset()
267
        return e, obs
268

269
Product

Resources

Company