CoCalc -- env_nas100

GitHub Repository: AI4Finance-Foundation/FinRL
Path: blob/master/finrl/meta/env_stock_trading/env_nas100_wrds.py
⁷³² views
1
from __future__ import annotations
2

3
import os
4

5
import gym
6
import numpy as np
7
from numpy import random as rd
8

9
gym.logger.set_level(
10
    40
11
)  # Block warning: 'WARN: Box bound precision lowered by casting to float32'
12

13

14
class StockEnvNAS100:
15
    def __init__(
16
        self,
17
        cwd="./data/nas100",
18
        price_ary=None,
19
        tech_ary=None,
20
        turbulence_ary=None,
21
        gamma=0.999,
22
        turbulence_thresh=30,
23
        min_stock_rate=0.1,
24
        max_stock=1e2,
25
        initial_capital=1e6,
26
        buy_cost_pct=1e-3,
27
        sell_cost_pct=1e-3,
28
        data_gap=4,
29
        reward_scaling=2**-11,
30
        ticker_list=None,
31
        tech_indicator_list=None,
32
        initial_stocks=None,
33
        if_eval=False,
34
        if_trade=False,
35
    ):
36
        self.min_stock_rate = min_stock_rate
37
        beg_i, mid_i, end_i = 0, int(211210), int(422420)
38

39
        (i0, i1) = (beg_i, mid_i) if if_eval else (mid_i, end_i)
40
        data_arrays = (
41
            self.load_data(cwd) if cwd is not None else price_ary,
42
            tech_ary,
43
            turbulence_ary,
44
        )
45
        if not if_trade:
46
            data_arrays = [ary[i0:i1:data_gap] for ary in data_arrays]
47
        else:
48
            data_arrays = [
49
                ary[int(422420) : int(528026) : data_gap] for ary in data_arrays
50
            ]
51
        self.price_ary, self.tech_ary, turbulence_ary = data_arrays
52

53
        self.tech_ary = self.tech_ary * 2**-7
54
        self.turbulence_bool = (turbulence_ary > turbulence_thresh).astype(np.float32)
55
        self.turbulence_ary = (
56
            self.sigmoid_sign(turbulence_ary, turbulence_thresh) * 2**-5
57
        ).astype(np.float32)
58

59
        stock_dim = self.price_ary.shape[1]
60
        self.gamma = gamma
61
        self.max_stock = max_stock
62
        self.buy_cost_pct = buy_cost_pct
63
        self.sell_cost_pct = sell_cost_pct
64
        self.reward_scaling = reward_scaling
65
        self.initial_capital = initial_capital
66
        self.initial_stocks = (
67
            np.zeros(stock_dim, dtype=np.float32)
68
            if initial_stocks is None
69
            else initial_stocks
70
        )
71

72
        # reset()
73
        self.day = None
74
        self.amount = None
75
        self.stocks = None
76
        self.total_asset = None
77
        self.gamma_reward = None
78
        self.initial_total_asset = None
79

80
        # environment information
81
        self.env_name = "StockEnvNAS"
82
        # self.state_dim = 1 + 2 + 2 * stock_dim + self.tech_ary.shape[1]
83
        # # amount + (turbulence, turbulence_bool) + (price, stock) * stock_dim + tech_dim
84
        self.state_dim = 1 + 2 + 3 * stock_dim + self.tech_ary.shape[1]
85
        # amount + (turbulence, turbulence_bool) + (price, stock) * stock_dim + tech_dim
86
        self.stocks_cd = None
87
        self.action_dim = stock_dim
88
        self.max_step = self.price_ary.shape[0] - 1
89
        self.if_discrete = False
90
        self.target_return = 2.2
91
        self.episode_return = 0.0
92

93
    def reset(
94
        self,
95
        *,
96
        seed=None,
97
        options=None,
98
    ):
99
        self.day = 0
100
        price = self.price_ary[self.day]
101

102
        self.stocks = (
103
            self.initial_stocks + rd.randint(0, 64, size=self.initial_stocks.shape)
104
        ).astype(np.float32)
105
        self.stocks_cd = np.zeros_like(self.stocks)
106
        self.amount = (
107
            self.initial_capital * rd.uniform(0.95, 1.05) - (self.stocks * price).sum()
108
        )
109

110
        self.total_asset = self.amount + (self.stocks * price).sum()
111
        self.initial_total_asset = self.total_asset
112
        self.gamma_reward = 0.0
113
        return self.get_state(price)  # state
114

115
    def step(self, actions):
116
        actions = (actions * self.max_stock).astype(int)
117

118
        self.day += 1
119
        price = self.price_ary[self.day]
120
        self.stocks_cd += 1
121

122
        if self.turbulence_bool[self.day] == 0:
123
            min_action = int(self.max_stock * self.min_stock_rate)  # stock_cd
124
            for index in np.where(actions < -min_action)[0]:  # sell_index:
125
                if price[index] > 0:  # Sell only if current asset is > 0
126
                    sell_num_shares = min(self.stocks[index], -actions[index])
127
                    self.stocks[index] -= sell_num_shares
128
                    self.amount += (
129
                        price[index] * sell_num_shares * (1 - self.sell_cost_pct)
130
                    )
131
                    self.stocks_cd[index] = 0
132
            for index in np.where(actions > min_action)[0]:  # buy_index:
133
                if (
134
                    price[index] > 0
135
                ):  # Buy only if the price is > 0 (no missing data in this particular date)
136
                    buy_num_shares = min(self.amount // price[index], actions[index])
137
                    self.stocks[index] += buy_num_shares
138
                    self.amount -= (
139
                        price[index] * buy_num_shares * (1 + self.buy_cost_pct)
140
                    )
141
                    self.stocks_cd[index] = 0
142

143
        else:  # sell all when turbulence
144
            self.amount += (self.stocks * price).sum() * (1 - self.sell_cost_pct)
145
            self.stocks[:] = 0
146
            self.stocks_cd[:] = 0
147

148
        state = self.get_state(price)
149
        total_asset = self.amount + (self.stocks * price).sum()
150
        reward = (total_asset - self.total_asset) * self.reward_scaling
151
        self.total_asset = total_asset
152

153
        self.gamma_reward = self.gamma_reward * self.gamma + reward
154
        done = self.day == self.max_step
155
        if done:
156
            reward = self.gamma_reward
157
            self.episode_return = total_asset / self.initial_total_asset
158

159
        return state, reward, done, dict()
160

161
    def get_state(self, price):
162
        amount = np.array(max(self.amount, 1e4) * (2**-12), dtype=np.float32)
163
        scale = np.array(2**-6, dtype=np.float32)
164
        return np.hstack(
165
            (
166
                amount,
167
                self.turbulence_ary[self.day],
168
                self.turbulence_bool[self.day],
169
                price * scale,
170
                self.stocks * scale,
171
                self.stocks_cd,
172
                self.tech_ary[self.day],
173
            )
174
        )  # state.astype(np.float32)
175

176
    def load_data(self, cwd):
177
        data_path_price_array = f"{cwd}/price_ary.npy"
178
        data_path_tech_array = f"{cwd}/tech_ary.npy"
179
        data_path_turb_array = f"{cwd}/turb_ary.npy"
180

181
        turbulence_ary = np.load(
182
            data_path_turb_array
183
        )  # turbulence_ary.shape = (1358, ). std, min, max = 3, 0, 65.2
184
        turbulence_ary = turbulence_ary.repeat(390)  # 13580*390 = 529620
185
        turbulence_ary = turbulence_ary[-528026:]  # 15926 + 528026 = 528026
186

187
        if os.path.exists(data_path_price_array):
188
            price_ary = np.load(data_path_price_array).astype(np.float32)
189
            tech_ary = np.load(data_path_tech_array).astype(np.float32)
190
            # turbulence_ary = load_dict['turbulence_ary'].astype(np.float32)
191

192
        return price_ary, tech_ary, turbulence_ary
193

194
    def draw_cumulative_return(self, args, _torch) -> list:
195
        state_dim = self.state_dim
196
        action_dim = self.action_dim
197

198
        agent = args.agent
199
        net_dim = args.net_dim
200
        cwd = args.cwd
201

202
        agent.init(net_dim, state_dim, action_dim)
203
        agent.save_load_model(cwd=cwd, if_save=False)
204
        act = agent.act
205
        device = agent.device
206

207
        state = self.reset()
208
        episode_returns = list()  # the cumulative_return / initial_account
209
        with _torch.no_grad():
210
            for i in range(self.max_step):
211
                s_tensor = _torch.as_tensor((state,), device=device)
212
                a_tensor = act(s_tensor)  # action_tanh = act.forward()
213
                action = (
214
                    a_tensor.detach().cpu().numpy()[0]
215
                )  # not need detach(), because with torch.no_grad() outside
216
                state, reward, done, _ = self.step(action)
217

218
                total_asset = (
219
                    self.amount + (self.price_ary[self.day] * self.stocks).sum()
220
                )
221
                episode_return = total_asset / self.initial_total_asset
222
                episode_returns.append(episode_return)
223
                if done:
224
                    break
225

226
        import matplotlib.pyplot as plt
227

228
        plt.plot(episode_returns)
229
        plt.grid()
230
        plt.title("cumulative return")
231
        plt.xlabel("day")
232
        plt.xlabel("multiple of initial_account")
233
        plt.savefig(f"{cwd}/cumulative_return.jpg")
234
        print(f"| draw_cumulative_return: save in {cwd}/cumulative_return.jpg")
235
        return episode_returns
236

237
    @staticmethod
238
    def sigmoid_sign(ary, thresh):
239
        def sigmoid(x):
240
            return 1 / (1 + np.exp(-x * np.e)) - 0.5
241

242
        return sigmoid(ary / thresh) * thresh
243

244
Product

Resources

Company