CoCalc -- env_stocktrading

GitHub Repository: AI4Finance-Foundation/FinRL
Path: blob/master/finrl/meta/env_stock_trading/env_stocktrading_stoploss.py
⁷³² views
1
from __future__ import annotations
2

3
import random
4
import time
5
from copy import deepcopy
6

7
import gym
8
import matplotlib
9
import numpy as np
10
import pandas as pd
11
from gym import spaces
12
from stable_baselines3.common import logger
13
from stable_baselines3.common.vec_env import DummyVecEnv
14
from stable_baselines3.common.vec_env import SubprocVecEnv
15

16
matplotlib.use("Agg")
17

18

19
class StockTradingEnvStopLoss(gym.Env):
20
    """
21
    A stock trading environment for OpenAI gym
22
    This environment penalizes the model if excedeed the stop-loss threshold, selling assets with under expectation %profit, and also
23
    for not maintaining a reserve of cash.
24
    This enables the model to do trading with high confidence and manage cash reserves in addition to performing trading procedures.
25

26
    Reward at any step is given as follows
27
        r_i = (sum(cash, asset_value) + additional_reward - total_penalty - initial_cash) / initial_cash / days_elapsed
28
        , where total_penalty = cash_penalty + stop_loss_penalty + low_profit_penalty
29
                cash_penalty = max(0, sum(cash, asset_value)*cash_penalty_proportion-cash)
30
                stop_loss_penalty = -1 * dot(holdings,negative_closing_diff_avg_buy)
31
                low_profit_penalty = -1 * dot(holdings,negative_profit_sell_diff_avg_buy)
32
                additional_reward = dot(holdings,positive_profit_sell_diff_avg_buy)
33

34
        This reward function takes into account a profit/loss ratio constraint, liquidity requirement, as well as long-term accrued rewards.
35
        This reward function also forces the model to trade only when it's really confident to do so.
36

37
    Parameters:
38
    state space: {start_cash, <owned_shares>, for s in stocks{<stock.values>}, }
39
        df (pandas.DataFrame): Dataframe containing data
40
        buy_cost_pct (float): cost for buying shares
41
        sell_cost_pct (float): cost for selling shares
42
        hmax (int): max number of share purchases allowed per asset
43
        discrete_actions (bool): option to choose whether perform dicretization on actions space or not
44
        shares_increment (int): multiples number of shares can be bought in each trade.
45
        stoploss_penalty (float): Maximum loss we can tolerate. Valid value range is between 0 and 1. If x is specified, then agent will force sell all holdings for a particular asset if current price < x * avg_buy_price
46
        profit_loss_ratio (int, float): Expected profit/loss ratio. Only applicable when stoploss_penalty < 1.
47
        turbulence_threshold (float): Maximum turbulence allowed in market for purchases to occur. If exceeded, positions are liquidated
48
        print_verbosity(int): When iterating (step), how often to print stats about state of env
49
        initial_amount: (int, float): Amount of cash initially available
50
        daily_information_columns (list(str)): Columns to use when building state space from the dataframe. It could be OHLC columns or any other variables such as technical indicators and turbulence index
51
        cash_penalty_proportion (int, float): Penalty to apply if the algorithm runs out of cash
52
        patient (bool): option to choose whether end the cycle when we're running out of cash or just don't buy anything until we got additional cash
53
    action space: <share_dollar_purchases>
54
    TODO:
55
        add holdings to memory
56
        move transactions to after the clip step.
57
    tests:
58
        after reset, static strategy should result in same metrics
59
        given no change in prices, no change in asset values
60
    """
61

62
    metadata = {"render.modes": ["human"]}
63

64
    def __init__(
65
        self,
66
        df,
67
        buy_cost_pct=3e-3,
68
        sell_cost_pct=3e-3,
69
        date_col_name="date",
70
        hmax=10,
71
        discrete_actions=False,
72
        shares_increment=1,
73
        stoploss_penalty=0.9,
74
        profit_loss_ratio=2,
75
        turbulence_threshold=None,
76
        print_verbosity=10,
77
        initial_amount=1e6,
78
        daily_information_cols=["open", "close", "high", "low", "volume"],
79
        cache_indicator_data=True,
80
        cash_penalty_proportion=0.1,
81
        random_start=True,
82
        patient=False,
83
        currency="$",
84
    ):
85
        self.df = df
86
        self.stock_col = "tic"
87
        self.assets = df[self.stock_col].unique()
88
        self.dates = df[date_col_name].sort_values().unique()
89
        self.random_start = random_start
90
        self.discrete_actions = discrete_actions
91
        self.patient = patient
92
        self.currency = currency
93
        self.df = self.df.set_index(date_col_name)
94
        self.shares_increment = shares_increment
95
        self.hmax = hmax
96
        self.initial_amount = initial_amount
97
        self.print_verbosity = print_verbosity
98
        self.buy_cost_pct = buy_cost_pct
99
        self.sell_cost_pct = sell_cost_pct
100
        self.stoploss_penalty = stoploss_penalty
101
        self.min_profit_penalty = 1 + profit_loss_ratio * (1 - self.stoploss_penalty)
102
        self.turbulence_threshold = turbulence_threshold
103
        self.daily_information_cols = daily_information_cols
104
        self.state_space = (
105
            1 + len(self.assets) + len(self.assets) * len(self.daily_information_cols)
106
        )
107
        self.action_space = spaces.Box(low=-1, high=1, shape=(len(self.assets),))
108
        self.observation_space = spaces.Box(
109
            low=-np.inf, high=np.inf, shape=(self.state_space,)
110
        )
111
        self.turbulence = 0
112
        self.episode = -1  # initialize so we can call reset
113
        self.episode_history = []
114
        self.printed_header = False
115
        self.cache_indicator_data = cache_indicator_data
116
        self.cached_data = None
117
        self.cash_penalty_proportion = cash_penalty_proportion
118
        if self.cache_indicator_data:
119
            print("caching data")
120
            self.cached_data = [
121
                self.get_date_vector(i) for i, _ in enumerate(self.dates)
122
            ]
123
            print("data cached!")
124

125
    def seed(self, seed=None):
126
        if seed is None:
127
            seed = int(round(time.time() * 1000))
128
        random.seed(seed)
129

130
    @property
131
    def current_step(self):
132
        return self.date_index - self.starting_point
133

134
    def reset(
135
        self,
136
        *,
137
        seed=None,
138
        options=None,
139
    ):
140
        self.seed()
141
        self.sum_trades = 0
142
        self.actual_num_trades = 0
143
        self.closing_diff_avg_buy = np.zeros(len(self.assets))
144
        self.profit_sell_diff_avg_buy = np.zeros(len(self.assets))
145
        self.n_buys = np.zeros(len(self.assets))
146
        self.avg_buy_price = np.zeros(len(self.assets))
147
        if self.random_start:
148
            starting_point = random.choice(range(int(len(self.dates) * 0.5)))
149
            self.starting_point = starting_point
150
        else:
151
            self.starting_point = 0
152
        self.date_index = self.starting_point
153
        self.turbulence = 0
154
        self.episode += 1
155
        self.actions_memory = []
156
        self.transaction_memory = []
157
        self.state_memory = []
158
        self.account_information = {
159
            "cash": [],
160
            "asset_value": [],
161
            "total_assets": [],
162
            "reward": [],
163
        }
164
        init_state = np.array(
165
            [self.initial_amount]
166
            + [0] * len(self.assets)
167
            + self.get_date_vector(self.date_index)
168
        )
169
        self.state_memory.append(init_state)
170
        return init_state
171

172
    def get_date_vector(self, date, cols=None):
173
        if (cols is None) and (self.cached_data is not None):
174
            return self.cached_data[date]
175
        else:
176
            date = self.dates[date]
177
            if cols is None:
178
                cols = self.daily_information_cols
179
            trunc_df = self.df.loc[[date]]
180
            v = []
181
            for a in self.assets:
182
                subset = trunc_df[trunc_df[self.stock_col] == a]
183
                v += subset.loc[date, cols].tolist()
184
            assert len(v) == len(self.assets) * len(cols)
185
            return v
186

187
    def return_terminal(self, reason="Last Date", reward=0):
188
        state = self.state_memory[-1]
189
        self.log_step(reason=reason, terminal_reward=reward)
190
        # Add outputs to logger interface
191
        gl_pct = self.account_information["total_assets"][-1] / self.initial_amount
192
        logger.record("environment/GainLoss_pct", (gl_pct - 1) * 100)
193
        logger.record(
194
            "environment/total_assets",
195
            int(self.account_information["total_assets"][-1]),
196
        )
197
        reward_pct = self.account_information["total_assets"][-1] / self.initial_amount
198
        logger.record("environment/total_reward_pct", (reward_pct - 1) * 100)
199
        logger.record("environment/total_trades", self.sum_trades)
200
        logger.record(
201
            "environment/actual_num_trades",
202
            self.actual_num_trades,
203
        )
204
        logger.record(
205
            "environment/avg_daily_trades",
206
            self.sum_trades / (self.current_step),
207
        )
208
        logger.record(
209
            "environment/avg_daily_trades_per_asset",
210
            self.sum_trades / (self.current_step) / len(self.assets),
211
        )
212
        logger.record("environment/completed_steps", self.current_step)
213
        logger.record(
214
            "environment/sum_rewards", np.sum(self.account_information["reward"])
215
        )
216
        logger.record(
217
            "environment/cash_proportion",
218
            self.account_information["cash"][-1]
219
            / self.account_information["total_assets"][-1],
220
        )
221
        return state, reward, True, {}
222

223
    def log_step(self, reason, terminal_reward=None):
224
        if terminal_reward is None:
225
            terminal_reward = self.account_information["reward"][-1]
226
        cash_pct = (
227
            self.account_information["cash"][-1]
228
            / self.account_information["total_assets"][-1]
229
        )
230
        gl_pct = self.account_information["total_assets"][-1] / self.initial_amount
231
        rec = [
232
            self.episode,
233
            self.date_index - self.starting_point,
234
            reason,
235
            f"{self.currency}{'{:0,.0f}'.format(float(self.account_information['cash'][-1]))}",
236
            f"{self.currency}{'{:0,.0f}'.format(float(self.account_information['total_assets'][-1]))}",
237
            f"{terminal_reward*100:0.5f}%",
238
            f"{(gl_pct - 1)*100:0.5f}%",
239
            f"{cash_pct*100:0.2f}%",
240
        ]
241
        self.episode_history.append(rec)
242
        print(self.template.format(*rec))
243

244
    def log_header(self):
245
        self.template = "{0:4}|{1:4}|{2:15}|{3:15}|{4:15}|{5:10}|{6:10}|{7:10}"  # column widths: 8, 10, 15, 7, 10
246
        print(
247
            self.template.format(
248
                "EPISODE",
249
                "STEPS",
250
                "TERMINAL_REASON",
251
                "CASH",
252
                "TOT_ASSETS",
253
                "TERMINAL_REWARD_unsc",
254
                "GAINLOSS_PCT",
255
                "CASH_PROPORTION",
256
            )
257
        )
258
        self.printed_header = True
259

260
    def get_reward(self):
261
        if self.current_step == 0:
262
            return 0
263
        else:
264
            total_assets = self.account_information["total_assets"][-1]
265
            cash = self.account_information["cash"][-1]
266
            holdings = self.state_memory[-1][1 : len(self.assets) + 1]
267
            neg_closing_diff_avg_buy = np.clip(self.closing_diff_avg_buy, -np.inf, 0)
268
            neg_profit_sell_diff_avg_buy = np.clip(
269
                self.profit_sell_diff_avg_buy, -np.inf, 0
270
            )
271
            pos_profit_sell_diff_avg_buy = np.clip(
272
                self.profit_sell_diff_avg_buy, 0, np.inf
273
            )
274

275
            cash_penalty = max(0, (total_assets * self.cash_penalty_proportion - cash))
276
            if self.current_step > 1:
277
                prev_holdings = self.state_memory[-2][1 : len(self.assets) + 1]
278
                stop_loss_penalty = -1 * np.dot(
279
                    np.array(prev_holdings), neg_closing_diff_avg_buy
280
                )
281
            else:
282
                stop_loss_penalty = 0
283
            low_profit_penalty = -1 * np.dot(
284
                np.array(holdings), neg_profit_sell_diff_avg_buy
285
            )
286
            total_penalty = cash_penalty + stop_loss_penalty + low_profit_penalty
287

288
            additional_reward = np.dot(np.array(holdings), pos_profit_sell_diff_avg_buy)
289

290
            reward = (
291
                (total_assets - total_penalty + additional_reward) / self.initial_amount
292
            ) - 1
293
            reward /= self.current_step
294

295
            return reward
296

297
    def step(self, actions):
298
        # let's just log what we're doing in terms of max actions at each step.
299
        self.sum_trades += np.sum(np.abs(actions))
300
        # print header only first time
301
        if self.printed_header is False:
302
            self.log_header()
303
        # print if it's time.
304
        if (self.current_step + 1) % self.print_verbosity == 0:
305
            self.log_step(reason="update")
306
        # if we're at the end
307
        if self.date_index == len(self.dates) - 1:
308
            # if we hit the end, set reward to total gains (or losses)
309
            return self.return_terminal(reward=self.get_reward())
310
        else:
311
            # compute value of cash + assets
312
            begin_cash = self.state_memory[-1][0]
313
            holdings = self.state_memory[-1][1 : len(self.assets) + 1]
314
            assert min(holdings) >= 0
315
            closings = np.array(self.get_date_vector(self.date_index, cols=["close"]))
316
            asset_value = np.dot(holdings, closings)
317
            # reward is (cash + assets) - (cash_last_step + assets_last_step)
318
            reward = self.get_reward()
319
            # log the values of cash, assets, and total assets
320
            self.account_information["cash"].append(begin_cash)
321
            self.account_information["asset_value"].append(asset_value)
322
            self.account_information["total_assets"].append(begin_cash + asset_value)
323
            self.account_information["reward"].append(reward)
324

325
            # multiply action values by our scalar multiplier and save
326
            actions = actions * self.hmax
327
            self.actions_memory.append(
328
                actions * closings
329
            )  # capture what the model's trying to do
330
            # buy/sell only if the price is > 0 (no missing data in this particular date)
331
            actions = np.where(closings > 0, actions, 0)
332
            if self.turbulence_threshold is not None:
333
                # if turbulence goes over threshold, just clear out all positions
334
                if self.turbulence >= self.turbulence_threshold:
335
                    actions = -(np.array(holdings) * closings)
336
                    self.log_step(reason="TURBULENCE")
337
            # scale cash purchases to asset
338
            if self.discrete_actions:
339
                # convert into integer because we can't buy fraction of shares
340
                actions = np.where(closings > 0, actions // closings, 0)
341
                actions = actions.astype(int)
342
                # round down actions to the nearest multiplies of shares_increment
343
                actions = np.where(
344
                    actions >= 0,
345
                    (actions // self.shares_increment) * self.shares_increment,
346
                    ((actions + self.shares_increment) // self.shares_increment)
347
                    * self.shares_increment,
348
                )
349
            else:
350
                actions = np.where(closings > 0, actions / closings, 0)
351

352
            # clip actions so we can't sell more assets than we hold
353
            actions = np.maximum(actions, -np.array(holdings))
354

355
            self.closing_diff_avg_buy = closings - (
356
                self.stoploss_penalty * self.avg_buy_price
357
            )
358
            if begin_cash >= self.stoploss_penalty * self.initial_amount:
359
                # clear out position if stop-loss criteria is met
360
                actions = np.where(
361
                    self.closing_diff_avg_buy < 0, -np.array(holdings), actions
362
                )
363

364
                if any(np.clip(self.closing_diff_avg_buy, -np.inf, 0) < 0):
365
                    self.log_step(reason="STOP LOSS")
366

367
            # compute our proceeds from sells, and add to cash
368
            sells = -np.clip(actions, -np.inf, 0)
369
            proceeds = np.dot(sells, closings)
370
            costs = proceeds * self.sell_cost_pct
371
            coh = begin_cash + proceeds
372
            # compute the cost of our buys
373
            buys = np.clip(actions, 0, np.inf)
374
            spend = np.dot(buys, closings)
375
            costs += spend * self.buy_cost_pct
376
            # if we run out of cash...
377
            if (spend + costs) > coh:
378
                if self.patient:
379
                    # ... just don't buy anything until we got additional cash
380
                    self.log_step(reason="CASH SHORTAGE")
381
                    actions = np.where(actions > 0, 0, actions)
382
                    spend = 0
383
                    costs = 0
384
                else:
385
                    # ... end the cycle and penalize
386
                    return self.return_terminal(
387
                        reason="CASH SHORTAGE", reward=self.get_reward()
388
                    )
389

390
            self.transaction_memory.append(actions)  # capture what the model's could do
391

392
            # get profitable sell actions
393
            sell_closing_price = np.where(
394
                sells > 0, closings, 0
395
            )  # get closing price of assets that we sold
396
            profit_sell = np.where(
397
                sell_closing_price - self.avg_buy_price > 0, 1, 0
398
            )  # mark the one which is profitable
399

400
            self.profit_sell_diff_avg_buy = np.where(
401
                profit_sell == 1,
402
                closings - (self.min_profit_penalty * self.avg_buy_price),
403
                0,
404
            )
405

406
            if any(np.clip(self.profit_sell_diff_avg_buy, -np.inf, 0) < 0):
407
                self.log_step(reason="LOW PROFIT")
408
            else:
409
                if any(np.clip(self.profit_sell_diff_avg_buy, 0, np.inf) > 0):
410
                    self.log_step(reason="HIGH PROFIT")
411

412
            # verify we didn't do anything impossible here
413
            assert (spend + costs) <= coh
414

415
            # log actual total trades we did up to current step
416
            self.actual_num_trades = np.sum(np.abs(np.sign(actions)))
417

418
            # update our holdings
419
            coh = coh - spend - costs
420
            holdings_updated = holdings + actions
421

422
            # Update average buy price
423
            buys = np.sign(buys)
424
            self.n_buys += buys
425
            self.avg_buy_price = np.where(
426
                buys > 0,
427
                self.avg_buy_price + ((closings - self.avg_buy_price) / self.n_buys),
428
                self.avg_buy_price,
429
            )  # incremental average
430

431
            # set as zero when we don't have any holdings anymore
432
            self.n_buys = np.where(holdings_updated > 0, self.n_buys, 0)
433
            self.avg_buy_price = np.where(holdings_updated > 0, self.avg_buy_price, 0)
434

435
            self.date_index += 1
436
            if self.turbulence_threshold is not None:
437
                self.turbulence = self.get_date_vector(
438
                    self.date_index, cols=["turbulence"]
439
                )[0]
440

441
            # Update State
442
            state = (
443
                [coh] + list(holdings_updated) + self.get_date_vector(self.date_index)
444
            )
445
            self.state_memory.append(state)
446

447
            return state, reward, False, {}
448

449
    def get_sb_env(self):
450
        def get_self():
451
            return deepcopy(self)
452

453
        e = DummyVecEnv([get_self])
454
        obs = e.reset()
455
        return e, obs
456

457
    def get_multiproc_env(self, n=10):
458
        def get_self():
459
            return deepcopy(self)
460

461
        e = SubprocVecEnv([get_self for _ in range(n)], start_method="fork")
462
        obs = e.reset()
463
        return e, obs
464

465
    def save_asset_memory(self):
466
        if self.current_step == 0:
467
            return None
468
        else:
469
            self.account_information["date"] = self.dates[
470
                -len(self.account_information["cash"]) :
471
            ]
472
            return pd.DataFrame(self.account_information)
473

474
    def save_action_memory(self):
475
        if self.current_step == 0:
476
            return None
477
        else:
478
            return pd.DataFrame(
479
                {
480
                    "date": self.dates[-len(self.account_information["cash"]) :],
481
                    "actions": self.actions_memory,
482
                    "transactions": self.transaction_memory,
483
                }
484
            )
485

486
Product

Resources

Company