CoCalc -- fundamental_stock

GitHub Repository: AI4Finance-Foundation/FinRL
Path: blob/master/finrl/applications/stock_trading/fundamental_stock_trading.py
⁷³² views
1
from __future__ import annotations
2

3

4
def main():
5
    import pandas as pd
6
    import numpy as np
7
    import matplotlib
8
    import matplotlib.pyplot as plt
9

10
    # matplotlib.use('Agg')
11
    import datetime
12

13
    from finrl import config
14
    from finrl import config_tickers
15
    from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
16
    from finrl.meta.preprocessor.preprocessors import FeatureEngineer, data_split
17
    from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
18
    from finrl.agents.stablebaselines3.models import DRLAgent
19
    from finrl.plot import backtest_stats, backtest_plot, get_daily_return, get_baseline
20
    from finrl.main import check_and_make_directories
21
    from pprint import pprint
22
    from stable_baselines3.common.logger import configure
23
    import sys
24

25
    sys.path.append("../FinRL")
26

27
    import itertools
28

29
    from finrl.config import (
30
        DATA_SAVE_DIR,
31
        TRAINED_MODEL_DIR,
32
        TENSORBOARD_LOG_DIR,
33
        RESULTS_DIR,
34
        INDICATORS,
35
        TRAIN_START_DATE,
36
        TRAIN_END_DATE,
37
        TEST_START_DATE,
38
        TEST_END_DATE,
39
        TRADE_START_DATE,
40
        TRADE_END_DATE,
41
    )
42

43
    from finrl.config_tickers import DOW_30_TICKER
44

45
    check_and_make_directories(
46
        [DATA_SAVE_DIR, TRAINED_MODEL_DIR, TENSORBOARD_LOG_DIR, RESULTS_DIR]
47
    )
48

49
    print(DOW_30_TICKER)
50

51
    TRAIN_START_DATE = "2009-01-01"
52
    TRAIN_END_DATE = "2019-01-01"
53
    TEST_START_DATE = "2019-01-01"
54
    TEST_END_DATE = "2021-01-01"
55

56
    df = YahooDownloader(
57
        start_date=TRAIN_START_DATE, end_date=TEST_END_DATE, ticker_list=DOW_30_TICKER
58
    ).fetch_data()
59

60
    df["date"] = pd.to_datetime(df["date"], format="%Y-%m-%d")
61

62
    df.sort_values(["date", "tic"], ignore_index=True).head()
63

64
    # Import fundamental data from my GitHub repository
65
    url = "https://raw.githubusercontent.com/mariko-sawada/FinRL_with_fundamental_data/main/dow_30_fundamental_wrds.csv"
66

67
    fund = pd.read_csv(url)
68

69
    # List items that are used to calculate financial ratios
70

71
    items = [
72
        "datadate",  # Date
73
        "tic",  # Ticker
74
        "oiadpq",  # Quarterly operating income
75
        "revtq",  # Quartely revenue
76
        "niq",  # Quartely net income
77
        "atq",  # Total asset
78
        "teqq",  # Shareholder's equity
79
        "epspiy",  # EPS(Basic) incl. Extraordinary items
80
        "ceqq",  # Common Equity
81
        "cshoq",  # Common Shares Outstanding
82
        "dvpspq",  # Dividends per share
83
        "actq",  # Current assets
84
        "lctq",  # Current liabilities
85
        "cheq",  # Cash & Equivalent
86
        "rectq",  # Recievalbles
87
        "cogsq",  # Cost of  Goods Sold
88
        "invtq",  # Inventories
89
        "apq",  # Account payable
90
        "dlttq",  # Long term debt
91
        "dlcq",  # Debt in current liabilites
92
        "ltq",  # Liabilities
93
    ]
94

95
    # Omit items that will not be used
96
    fund_data = fund[items]
97

98
    # Rename column names for the sake of readability
99
    fund_data = fund_data.rename(
100
        columns={
101
            "datadate": "date",  # Date
102
            "oiadpq": "op_inc_q",  # Quarterly operating income
103
            "revtq": "rev_q",  # Quartely revenue
104
            "niq": "net_inc_q",  # Quartely net income
105
            "atq": "tot_assets",  # Assets
106
            "teqq": "sh_equity",  # Shareholder's equity
107
            "epspiy": "eps_incl_ex",  # EPS(Basic) incl. Extraordinary items
108
            "ceqq": "com_eq",  # Common Equity
109
            "cshoq": "sh_outstanding",  # Common Shares Outstanding
110
            "dvpspq": "div_per_sh",  # Dividends per share
111
            "actq": "cur_assets",  # Current assets
112
            "lctq": "cur_liabilities",  # Current liabilities
113
            "cheq": "cash_eq",  # Cash & Equivalent
114
            "rectq": "receivables",  # Receivalbles
115
            "cogsq": "cogs_q",  # Cost of  Goods Sold
116
            "invtq": "inventories",  # Inventories
117
            "apq": "payables",  # Account payable
118
            "dlttq": "long_debt",  # Long term debt
119
            "dlcq": "short_debt",  # Debt in current liabilites
120
            "ltq": "tot_liabilities",  # Liabilities
121
        }
122
    )
123

124
    # Calculate financial ratios
125
    date = pd.to_datetime(fund_data["date"], format="%Y%m%d")
126

127
    tic = fund_data["tic"].to_frame("tic")
128

129
    # Profitability ratios
130
    # Operating Margin
131
    OPM = pd.Series(np.empty(fund_data.shape[0], dtype=object), name="OPM")
132
    for i in range(0, fund_data.shape[0]):
133
        if i - 3 < 0:
134
            OPM[i] = np.nan
135
        elif fund_data.iloc[i, 1] != fund_data.iloc[i - 3, 1]:
136
            OPM.iloc[i] = np.nan
137
        else:
138
            OPM.iloc[i] = np.sum(fund_data["op_inc_q"].iloc[i - 3 : i]) / np.sum(
139
                fund_data["rev_q"].iloc[i - 3 : i]
140
            )
141

142
    # Net Profit Margin
143
    NPM = pd.Series(np.empty(fund_data.shape[0], dtype=object), name="NPM")
144
    for i in range(0, fund_data.shape[0]):
145
        if i - 3 < 0:
146
            NPM[i] = np.nan
147
        elif fund_data.iloc[i, 1] != fund_data.iloc[i - 3, 1]:
148
            NPM.iloc[i] = np.nan
149
        else:
150
            NPM.iloc[i] = np.sum(fund_data["net_inc_q"].iloc[i - 3 : i]) / np.sum(
151
                fund_data["rev_q"].iloc[i - 3 : i]
152
            )
153

154
    # Return On Assets
155
    ROA = pd.Series(np.empty(fund_data.shape[0], dtype=object), name="ROA")
156
    for i in range(0, fund_data.shape[0]):
157
        if i - 3 < 0:
158
            ROA[i] = np.nan
159
        elif fund_data.iloc[i, 1] != fund_data.iloc[i - 3, 1]:
160
            ROA.iloc[i] = np.nan
161
        else:
162
            ROA.iloc[i] = (
163
                np.sum(fund_data["net_inc_q"].iloc[i - 3 : i])
164
                / fund_data["tot_assets"].iloc[i]
165
            )
166

167
    # Return on Equity
168
    ROE = pd.Series(np.empty(fund_data.shape[0], dtype=object), name="ROE")
169
    for i in range(0, fund_data.shape[0]):
170
        if i - 3 < 0:
171
            ROE[i] = np.nan
172
        elif fund_data.iloc[i, 1] != fund_data.iloc[i - 3, 1]:
173
            ROE.iloc[i] = np.nan
174
        else:
175
            ROE.iloc[i] = (
176
                np.sum(fund_data["net_inc_q"].iloc[i - 3 : i])
177
                / fund_data["sh_equity"].iloc[i]
178
            )
179

180
            # For calculating valuation ratios in the next subpart, calculate per share items in advance
181
    # Earnings Per Share
182
    EPS = fund_data["eps_incl_ex"].to_frame("EPS")
183

184
    # Book Per Share
185
    BPS = (fund_data["com_eq"] / fund_data["sh_outstanding"]).to_frame(
186
        "BPS"
187
    )  # Need to check units
188

189
    # Dividend Per Share
190
    DPS = fund_data["div_per_sh"].to_frame("DPS")
191

192
    # Liquidity ratios
193
    # Current ratio
194
    cur_ratio = (fund_data["cur_assets"] / fund_data["cur_liabilities"]).to_frame(
195
        "cur_ratio"
196
    )
197

198
    # Quick ratio
199
    quick_ratio = (
200
        (fund_data["cash_eq"] + fund_data["receivables"]) / fund_data["cur_liabilities"]
201
    ).to_frame("quick_ratio")
202

203
    # Cash ratio
204
    cash_ratio = (fund_data["cash_eq"] / fund_data["cur_liabilities"]).to_frame(
205
        "cash_ratio"
206
    )
207

208
    # Efficiency ratios
209
    # Inventory turnover ratio
210
    inv_turnover = pd.Series(
211
        np.empty(fund_data.shape[0], dtype=object), name="inv_turnover"
212
    )
213
    for i in range(0, fund_data.shape[0]):
214
        if i - 3 < 0:
215
            inv_turnover[i] = np.nan
216
        elif fund_data.iloc[i, 1] != fund_data.iloc[i - 3, 1]:
217
            inv_turnover.iloc[i] = np.nan
218
        else:
219
            inv_turnover.iloc[i] = (
220
                np.sum(fund_data["cogs_q"].iloc[i - 3 : i])
221
                / fund_data["inventories"].iloc[i]
222
            )
223

224
    # Receivables turnover ratio
225
    acc_rec_turnover = pd.Series(
226
        np.empty(fund_data.shape[0], dtype=object), name="acc_rec_turnover"
227
    )
228
    for i in range(0, fund_data.shape[0]):
229
        if i - 3 < 0:
230
            acc_rec_turnover[i] = np.nan
231
        elif fund_data.iloc[i, 1] != fund_data.iloc[i - 3, 1]:
232
            acc_rec_turnover.iloc[i] = np.nan
233
        else:
234
            acc_rec_turnover.iloc[i] = (
235
                np.sum(fund_data["rev_q"].iloc[i - 3 : i])
236
                / fund_data["receivables"].iloc[i]
237
            )
238

239
    # Payable turnover ratio
240
    acc_pay_turnover = pd.Series(
241
        np.empty(fund_data.shape[0], dtype=object), name="acc_pay_turnover"
242
    )
243
    for i in range(0, fund_data.shape[0]):
244
        if i - 3 < 0:
245
            acc_pay_turnover[i] = np.nan
246
        elif fund_data.iloc[i, 1] != fund_data.iloc[i - 3, 1]:
247
            acc_pay_turnover.iloc[i] = np.nan
248
        else:
249
            acc_pay_turnover.iloc[i] = (
250
                np.sum(fund_data["cogs_q"].iloc[i - 3 : i])
251
                / fund_data["payables"].iloc[i]
252
            )
253

254
    ## Leverage financial ratios
255
    # Debt ratio
256
    debt_ratio = (fund_data["tot_liabilities"] / fund_data["tot_assets"]).to_frame(
257
        "debt_ratio"
258
    )
259

260
    # Debt to Equity ratio
261
    debt_to_equity = (fund_data["tot_liabilities"] / fund_data["sh_equity"]).to_frame(
262
        "debt_to_equity"
263
    )
264

265
    # Create a dataframe that merges all the ratios
266
    ratios = pd.concat(
267
        [
268
            date,
269
            tic,
270
            OPM,
271
            NPM,
272
            ROA,
273
            ROE,
274
            EPS,
275
            BPS,
276
            DPS,
277
            cur_ratio,
278
            quick_ratio,
279
            cash_ratio,
280
            inv_turnover,
281
            acc_rec_turnover,
282
            acc_pay_turnover,
283
            debt_ratio,
284
            debt_to_equity,
285
        ],
286
        axis=1,
287
    )
288

289
    # Replace NAs infinite values with zero
290
    final_ratios = ratios.copy()
291
    final_ratios = final_ratios.fillna(0)
292
    final_ratios = final_ratios.replace(np.inf, 0)
293

294
    list_ticker = df["tic"].unique().tolist()
295
    list_date = list(pd.date_range(df["date"].min(), df["date"].max()))
296
    combination = list(itertools.product(list_date, list_ticker))
297

298
    # Merge stock price data and ratios into one dataframe
299
    processed_full = pd.DataFrame(combination, columns=["date", "tic"]).merge(
300
        df, on=["date", "tic"], how="left"
301
    )
302
    processed_full = processed_full.merge(final_ratios, how="left", on=["date", "tic"])
303
    processed_full = processed_full.sort_values(["tic", "date"])
304

305
    # Backfill the ratio data to make them daily
306
    processed_full = processed_full.bfill(axis="rows")
307

308
    # Calculate P/E, P/B and dividend yield using daily closing price
309
    processed_full["PE"] = processed_full["close"] / processed_full["EPS"]
310
    processed_full["PB"] = processed_full["close"] / processed_full["BPS"]
311
    processed_full["Div_yield"] = processed_full["DPS"] / processed_full["close"]
312

313
    # Drop per share items used for the above calculation
314
    processed_full = processed_full.drop(columns=["day", "EPS", "BPS", "DPS"])
315
    # Replace NAs infinite values with zero
316
    processed_full = processed_full.copy()
317
    processed_full = processed_full.fillna(0)
318
    processed_full = processed_full.replace(np.inf, 0)
319

320
    # Check the final data
321
    processed_full.sort_values(["date", "tic"], ignore_index=True).head(10)
322

323
    train_data = data_split(processed_full, TRAIN_START_DATE, TRAIN_END_DATE)
324
    trade_data = data_split(processed_full, TEST_START_DATE, TEST_END_DATE)
325
    # Check the length of the two datasets
326
    print(len(train_data))
327
    print(len(trade_data))
328

329
    import gym
330
    import matplotlib
331
    import matplotlib.pyplot as plt
332
    import numpy as np
333
    import pandas as pd
334
    from gym import spaces
335
    from gym.utils import seeding
336
    from stable_baselines3.common.vec_env import DummyVecEnv
337

338
    matplotlib.use("Agg")
339

340
    # from stable_baselines3.common import logger
341

342
    class StockTradingEnv(gym.Env):
343
        """A stock trading environment for OpenAI gym"""
344

345
        metadata = {"render.modes": ["human"]}
346

347
        def __init__(
348
            self,
349
            df,
350
            stock_dim,
351
            hmax,
352
            initial_amount,
353
            buy_cost_pct,
354
            sell_cost_pct,
355
            reward_scaling,
356
            state_space,
357
            action_space,
358
            tech_indicator_list,
359
            turbulence_threshold=None,
360
            risk_indicator_col="turbulence",
361
            make_plots=False,
362
            print_verbosity=10,
363
            day=0,
364
            initial=True,
365
            previous_state=[],
366
            model_name="",
367
            mode="",
368
            iteration="",
369
        ):
370
            self.day = day
371
            self.df = df
372
            self.stock_dim = stock_dim
373
            self.hmax = hmax
374
            self.initial_amount = initial_amount
375
            self.buy_cost_pct = buy_cost_pct
376
            self.sell_cost_pct = sell_cost_pct
377
            self.reward_scaling = reward_scaling
378
            self.state_space = state_space
379
            self.action_space = action_space
380
            self.tech_indicator_list = tech_indicator_list
381
            self.action_space = spaces.Box(low=-1, high=1, shape=(self.action_space,))
382
            self.observation_space = spaces.Box(
383
                low=-np.inf, high=np.inf, shape=(self.state_space,)
384
            )
385
            self.data = self.df.loc[self.day, :]
386
            self.terminal = False
387
            self.make_plots = make_plots
388
            self.print_verbosity = print_verbosity
389
            self.turbulence_threshold = turbulence_threshold
390
            self.risk_indicator_col = risk_indicator_col
391
            self.initial = initial
392
            self.previous_state = previous_state
393
            self.model_name = model_name
394
            self.mode = mode
395
            self.iteration = iteration
396
            # initalize state
397
            self.state = self._initiate_state()
398

399
            # initialize reward
400
            self.reward = 0
401
            self.turbulence = 0
402
            self.cost = 0
403
            self.trades = 0
404
            self.episode = 0
405
            # memorize all the total balance change
406
            self.asset_memory = [self.initial_amount]
407
            self.rewards_memory = []
408
            self.actions_memory = []
409
            self.date_memory = [self._get_date()]
410
            # self.reset()
411
            self._seed()
412

413
        def _sell_stock(self, index, action):
414
            def _do_sell_normal():
415
                if self.state[index + 1] > 0:
416
                    # Sell only if the price is > 0 (no missing data in this particular date)
417
                    # perform sell action based on the sign of the action
418
                    if self.state[index + self.stock_dim + 1] > 0:
419
                        # Sell only if current asset is > 0
420
                        sell_num_shares = min(
421
                            abs(action), self.state[index + self.stock_dim + 1]
422
                        )
423
                        sell_amount = (
424
                            self.state[index + 1]
425
                            * sell_num_shares
426
                            * (1 - self.sell_cost_pct)
427
                        )
428
                        # update balance
429
                        self.state[0] += sell_amount
430

431
                        self.state[index + self.stock_dim + 1] -= sell_num_shares
432
                        self.cost += (
433
                            self.state[index + 1] * sell_num_shares * self.sell_cost_pct
434
                        )
435
                        self.trades += 1
436
                    else:
437
                        sell_num_shares = 0
438
                else:
439
                    sell_num_shares = 0
440

441
                return sell_num_shares
442

443
            # perform sell action based on the sign of the action
444
            if self.turbulence_threshold is not None:
445
                if self.turbulence >= self.turbulence_threshold:
446
                    if self.state[index + 1] > 0:
447
                        # Sell only if the price is > 0 (no missing data in this particular date)
448
                        # if turbulence goes over threshold, just clear out all positions
449
                        if self.state[index + self.stock_dim + 1] > 0:
450
                            # Sell only if current asset is > 0
451
                            sell_num_shares = self.state[index + self.stock_dim + 1]
452
                            sell_amount = (
453
                                self.state[index + 1]
454
                                * sell_num_shares
455
                                * (1 - self.sell_cost_pct)
456
                            )
457
                            # update balance
458
                            self.state[0] += sell_amount
459
                            self.state[index + self.stock_dim + 1] = 0
460
                            self.cost += (
461
                                self.state[index + 1]
462
                                * sell_num_shares
463
                                * self.sell_cost_pct
464
                            )
465
                            self.trades += 1
466
                        else:
467
                            sell_num_shares = 0
468
                    else:
469
                        sell_num_shares = 0
470
                else:
471
                    sell_num_shares = _do_sell_normal()
472
            else:
473
                sell_num_shares = _do_sell_normal()
474

475
            return sell_num_shares
476

477
        def _buy_stock(self, index, action):
478
            def _do_buy():
479
                if self.state[index + 1] > 0:
480
                    # Buy only if the price is > 0 (no missing data in this particular date)
481
                    available_amount = self.state[0] // self.state[index + 1]
482
                    # print('available_amount:{}'.format(available_amount))
483

484
                    # update balance
485
                    buy_num_shares = min(available_amount, action)
486
                    buy_amount = (
487
                        self.state[index + 1] * buy_num_shares * (1 + self.buy_cost_pct)
488
                    )
489
                    self.state[0] -= buy_amount
490

491
                    self.state[index + self.stock_dim + 1] += buy_num_shares
492

493
                    self.cost += (
494
                        self.state[index + 1] * buy_num_shares * self.buy_cost_pct
495
                    )
496
                    self.trades += 1
497
                else:
498
                    buy_num_shares = 0
499

500
                return buy_num_shares
501

502
            # perform buy action based on the sign of the action
503
            if self.turbulence_threshold is None:
504
                buy_num_shares = _do_buy()
505
            else:
506
                if self.turbulence < self.turbulence_threshold:
507
                    buy_num_shares = _do_buy()
508
                else:
509
                    buy_num_shares = 0
510
                    pass
511

512
            return buy_num_shares
513

514
        def _make_plot(self):
515
            plt.plot(self.asset_memory, "r")
516
            plt.savefig(f"results/account_value_trade_{self.episode}.png")
517
            plt.close()
518

519
        def step(self, actions):
520
            self.terminal = self.day >= len(self.df.index.unique()) - 1
521
            if self.terminal:
522
                # print(f"Episode: {self.episode}")
523
                if self.make_plots:
524
                    self._make_plot()
525
                end_total_asset = self.state[0] + sum(
526
                    np.array(self.state[1 : (self.stock_dim + 1)])
527
                    * np.array(
528
                        self.state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)]
529
                    )
530
                )
531
                df_total_value = pd.DataFrame(self.asset_memory)
532
                tot_reward = (
533
                    self.state[0]
534
                    + sum(
535
                        np.array(self.state[1 : (self.stock_dim + 1)])
536
                        * np.array(
537
                            self.state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)]
538
                        )
539
                    )
540
                    - self.initial_amount
541
                )
542
                df_total_value.columns = ["account_value"]
543
                df_total_value["date"] = self.date_memory
544
                df_total_value["daily_return"] = df_total_value[
545
                    "account_value"
546
                ].pct_change(1)
547
                if df_total_value["daily_return"].std() != 0:
548
                    sharpe = (
549
                        (252**0.5)
550
                        * df_total_value["daily_return"].mean()
551
                        / df_total_value["daily_return"].std()
552
                    )
553
                df_rewards = pd.DataFrame(self.rewards_memory)
554
                df_rewards.columns = ["account_rewards"]
555
                df_rewards["date"] = self.date_memory[:-1]
556
                if self.episode % self.print_verbosity == 0:
557
                    print(f"day: {self.day}, episode: {self.episode}")
558
                    print(f"begin_total_asset: {self.asset_memory[0]:0.2f}")
559
                    print(f"end_total_asset: {end_total_asset:0.2f}")
560
                    print(f"total_reward: {tot_reward:0.2f}")
561
                    print(f"total_cost: {self.cost:0.2f}")
562
                    print(f"total_trades: {self.trades}")
563
                    if df_total_value["daily_return"].std() != 0:
564
                        print(f"Sharpe: {sharpe:0.3f}")
565
                    print("=================================")
566

567
                if (self.model_name != "") and (self.mode != ""):
568
                    df_actions = self.save_action_memory()
569
                    df_actions.to_csv(
570
                        "results/actions_{}_{}_{}.csv".format(
571
                            self.mode, self.model_name, self.iteration
572
                        )
573
                    )
574
                    df_total_value.to_csv(
575
                        "results/account_value_{}_{}_{}.csv".format(
576
                            self.mode, self.model_name, self.iteration
577
                        ),
578
                        index=False,
579
                    )
580
                    df_rewards.to_csv(
581
                        "results/account_rewards_{}_{}_{}.csv".format(
582
                            self.mode, self.model_name, self.iteration
583
                        ),
584
                        index=False,
585
                    )
586
                    plt.plot(self.asset_memory, "r")
587
                    plt.savefig(
588
                        "results/account_value_{}_{}_{}.png".format(
589
                            self.mode, self.model_name, self.iteration
590
                        ),
591
                        index=False,
592
                    )
593
                    plt.close()
594

595
                # Add outputs to logger interface
596
                # logger.record("environment/portfolio_value", end_total_asset)
597
                # logger.record("environment/total_reward", tot_reward)
598
                # logger.record("environment/total_reward_pct", (tot_reward / (end_total_asset - tot_reward)) * 100)
599
                # logger.record("environment/total_cost", self.cost)
600
                # logger.record("environment/total_trades", self.trades)
601

602
                return self.state, self.reward, self.terminal, {}
603

604
            else:
605
                actions = (
606
                    actions * self.hmax
607
                )  # actions initially is scaled between 0 to 1
608
                actions = actions.astype(
609
                    int
610
                )  # convert into integer because we can't by fraction of shares
611
                if self.turbulence_threshold is not None:
612
                    if self.turbulence >= self.turbulence_threshold:
613
                        actions = np.array([-self.hmax] * self.stock_dim)
614
                begin_total_asset = self.state[0] + sum(
615
                    np.array(self.state[1 : (self.stock_dim + 1)])
616
                    * np.array(
617
                        self.state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)]
618
                    )
619
                )
620
                # print("begin_total_asset:{}".format(begin_total_asset))
621

622
                argsort_actions = np.argsort(actions)
623

624
                sell_index = argsort_actions[: np.where(actions < 0)[0].shape[0]]
625
                buy_index = argsort_actions[::-1][: np.where(actions > 0)[0].shape[0]]
626

627
                for index in sell_index:
628
                    # print(f"Num shares before: {self.state[index+self.stock_dim+1]}")
629
                    # print(f'take sell action before : {actions[index]}')
630
                    actions[index] = self._sell_stock(index, actions[index]) * (-1)
631
                    # print(f'take sell action after : {actions[index]}')
632
                    # print(f"Num shares after: {self.state[index+self.stock_dim+1]}")
633

634
                for index in buy_index:
635
                    # print('take buy action: {}'.format(actions[index]))
636
                    actions[index] = self._buy_stock(index, actions[index])
637

638
                self.actions_memory.append(actions)
639

640
                # state: s -> s+1
641
                self.day += 1
642
                self.data = self.df.loc[self.day, :]
643
                if self.turbulence_threshold is not None:
644
                    if len(self.df.tic.unique()) == 1:
645
                        self.turbulence = self.data[self.risk_indicator_col]
646
                    elif len(self.df.tic.unique()) > 1:
647
                        self.turbulence = self.data[self.risk_indicator_col].values[0]
648
                self.state = self._update_state()
649

650
                end_total_asset = self.state[0] + sum(
651
                    np.array(self.state[1 : (self.stock_dim + 1)])
652
                    * np.array(
653
                        self.state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)]
654
                    )
655
                )
656
                self.asset_memory.append(end_total_asset)
657
                self.date_memory.append(self._get_date())
658
                self.reward = end_total_asset - begin_total_asset
659
                self.rewards_memory.append(self.reward)
660
                self.reward = self.reward * self.reward_scaling
661

662
            return self.state, self.reward, self.terminal, {}
663

664
        def reset(self):
665
            # initiate state
666
            self.state = self._initiate_state()
667

668
            if self.initial:
669
                self.asset_memory = [self.initial_amount]
670
            else:
671
                previous_total_asset = self.previous_state[0] + sum(
672
                    np.array(self.state[1 : (self.stock_dim + 1)])
673
                    * np.array(
674
                        self.previous_state[
675
                            (self.stock_dim + 1) : (self.stock_dim * 2 + 1)
676
                        ]
677
                    )
678
                )
679
                self.asset_memory = [previous_total_asset]
680

681
            self.day = 0
682
            self.data = self.df.loc[self.day, :]
683
            self.turbulence = 0
684
            self.cost = 0
685
            self.trades = 0
686
            self.terminal = False
687
            # self.iteration=self.iteration
688
            self.rewards_memory = []
689
            self.actions_memory = []
690
            self.date_memory = [self._get_date()]
691

692
            self.episode += 1
693

694
            return self.state
695

696
        def render(self, mode="human", close=False):
697
            return self.state
698

699
        def _initiate_state(self):
700
            if self.initial:
701
                # For Initial State
702
                if len(self.df.tic.unique()) > 1:
703
                    # for multiple stock
704
                    state = (
705
                        [self.initial_amount]
706
                        + self.data.close.values.tolist()
707
                        + [0] * self.stock_dim
708
                        + sum(
709
                            [
710
                                self.data[tech].values.tolist()
711
                                for tech in self.tech_indicator_list
712
                            ],
713
                            [],
714
                        )
715
                    )
716
                else:
717
                    # for single stock
718
                    state = (
719
                        [self.initial_amount]
720
                        + [self.data.close]
721
                        + [0] * self.stock_dim
722
                        + sum(
723
                            [[self.data[tech]] for tech in self.tech_indicator_list], []
724
                        )
725
                    )
726
            else:
727
                # Using Previous State
728
                if len(self.df.tic.unique()) > 1:
729
                    # for multiple stock
730
                    state = (
731
                        [self.previous_state[0]]
732
                        + self.data.close.values.tolist()
733
                        + self.previous_state[
734
                            (self.stock_dim + 1) : (self.stock_dim * 2 + 1)
735
                        ]
736
                        + sum(
737
                            [
738
                                self.data[tech].values.tolist()
739
                                for tech in self.tech_indicator_list
740
                            ],
741
                            [],
742
                        )
743
                    )
744
                else:
745
                    # for single stock
746
                    state = (
747
                        [self.previous_state[0]]
748
                        + [self.data.close]
749
                        + self.previous_state[
750
                            (self.stock_dim + 1) : (self.stock_dim * 2 + 1)
751
                        ]
752
                        + sum(
753
                            [[self.data[tech]] for tech in self.tech_indicator_list], []
754
                        )
755
                    )
756
            return state
757

758
        def _update_state(self):
759
            if len(self.df.tic.unique()) > 1:
760
                # for multiple stock
761
                state = (
762
                    [self.state[0]]
763
                    + self.data.close.values.tolist()
764
                    + list(self.state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)])
765
                    + sum(
766
                        [
767
                            self.data[tech].values.tolist()
768
                            for tech in self.tech_indicator_list
769
                        ],
770
                        [],
771
                    )
772
                )
773

774
            else:
775
                # for single stock
776
                state = (
777
                    [self.state[0]]
778
                    + [self.data.close]
779
                    + list(self.state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)])
780
                    + sum([[self.data[tech]] for tech in self.tech_indicator_list], [])
781
                )
782
            return state
783

784
        def _get_date(self):
785
            if len(self.df.tic.unique()) > 1:
786
                date = self.data.date.unique()[0]
787
            else:
788
                date = self.data.date
789
            return date
790

791
        def save_asset_memory(self):
792
            date_list = self.date_memory
793
            asset_list = self.asset_memory
794
            # print(len(date_list))
795
            # print(len(asset_list))
796
            df_account_value = pd.DataFrame(
797
                {"date": date_list, "account_value": asset_list}
798
            )
799
            return df_account_value
800

801
        def save_action_memory(self):
802
            if len(self.df.tic.unique()) > 1:
803
                # date and close price length must match actions length
804
                date_list = self.date_memory[:-1]
805
                df_date = pd.DataFrame(date_list)
806
                df_date.columns = ["date"]
807

808
                action_list = self.actions_memory
809
                df_actions = pd.DataFrame(action_list)
810
                df_actions.columns = self.data.tic.values
811
                df_actions.index = df_date.date
812
                # df_actions = pd.DataFrame({'date':date_list,'actions':action_list})
813
            else:
814
                date_list = self.date_memory[:-1]
815
                action_list = self.actions_memory
816
                df_actions = pd.DataFrame({"date": date_list, "actions": action_list})
817
            return df_actions
818

819
        def _seed(self, seed=None):
820
            self.np_random, seed = seeding.np_random(seed)
821
            return [seed]
822

823
        def get_sb_env(self):
824
            e = DummyVecEnv([lambda: self])
825
            obs = e.reset()
826
            return e, obs
827

828
    ratio_list = [
829
        "OPM",
830
        "NPM",
831
        "ROA",
832
        "ROE",
833
        "cur_ratio",
834
        "quick_ratio",
835
        "cash_ratio",
836
        "inv_turnover",
837
        "acc_rec_turnover",
838
        "acc_pay_turnover",
839
        "debt_ratio",
840
        "debt_to_equity",
841
        "PE",
842
        "PB",
843
        "Div_yield",
844
    ]
845

846
    stock_dimension = len(train_data.tic.unique())
847
    state_space = 1 + 2 * stock_dimension + len(ratio_list) * stock_dimension
848
    print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")
849

850
    # Parameters for the environment
851
    env_kwargs = {
852
        "hmax": 100,
853
        "initial_amount": 1000000,
854
        "buy_cost_pct": 0.001,
855
        "sell_cost_pct": 0.001,
856
        "state_space": state_space,
857
        "stock_dim": stock_dimension,
858
        "tech_indicator_list": ratio_list,
859
        "action_space": stock_dimension,
860
        "reward_scaling": 1e-4,
861
    }
862

863
    # Establish the training environment using StockTradingEnv() class
864
    e_train_gym = StockTradingEnv(df=train_data, **env_kwargs)
865

866
    env_train, _ = e_train_gym.get_sb_env()
867
    print(type(env_train))
868

869
    # Set up the agent using DRLAgent() class using the environment created in the previous part
870
    agent = DRLAgent(env=env_train)
871

872
    if_using_a2c = False
873
    if_using_ddpg = False
874
    if_using_ppo = False
875
    if_using_td3 = False
876
    if_using_sac = True
877

878
    agent = DRLAgent(env=env_train)
879
    PPO_PARAMS = {
880
        "n_steps": 2048,
881
        "ent_coef": 0.01,
882
        "learning_rate": 0.00025,
883
        "batch_size": 128,
884
    }
885
    model_ppo = agent.get_model("ppo", model_kwargs=PPO_PARAMS)
886

887
    if if_using_ppo:
888
        # set up logger
889
        tmp_path = RESULTS_DIR + "/ppo"
890
        new_logger_ppo = configure(tmp_path, ["stdout", "csv", "tensorboard"])
891
        # Set new logger
892
        model_ppo.set_logger(new_logger_ppo)
893

894
    trained_ppo = (
895
        agent.train_model(model=model_ppo, tb_log_name="ppo", total_timesteps=50000)
896
        if if_using_ppo
897
        else None
898
    )
899

900
    agent = DRLAgent(env=env_train)
901
    model_ddpg = agent.get_model("ddpg")
902

903
    if if_using_ddpg:
904
        # set up logger
905
        tmp_path = RESULTS_DIR + "/ddpg"
906
        new_logger_ddpg = configure(tmp_path, ["stdout", "csv", "tensorboard"])
907
        # Set new logger
908
        model_ddpg.set_logger(new_logger_ddpg)
909

910
    trained_ddpg = (
911
        agent.train_model(model=model_ddpg, tb_log_name="ddpg", total_timesteps=50000)
912
        if if_using_ddpg
913
        else None
914
    )
915

916
    agent = DRLAgent(env=env_train)
917
    model_a2c = agent.get_model("a2c")
918

919
    if if_using_a2c:
920
        # set up logger
921
        tmp_path = RESULTS_DIR + "/a2c"
922
        new_logger_a2c = configure(tmp_path, ["stdout", "csv", "tensorboard"])
923
        # Set new logger
924
        model_a2c.set_logger(new_logger_a2c)
925

926
    trained_a2c = (
927
        agent.train_model(model=model_a2c, tb_log_name="a2c", total_timesteps=50000)
928
        if if_using_a2c
929
        else None
930
    )
931

932
    agent = DRLAgent(env=env_train)
933
    TD3_PARAMS = {"batch_size": 100, "buffer_size": 1000000, "learning_rate": 0.001}
934

935
    model_td3 = agent.get_model("td3", model_kwargs=TD3_PARAMS)
936

937
    if if_using_td3:
938
        # set up logger
939
        tmp_path = RESULTS_DIR + "/td3"
940
        new_logger_td3 = configure(tmp_path, ["stdout", "csv", "tensorboard"])
941
        # Set new logger
942
        model_td3.set_logger(new_logger_td3)
943

944
    trained_td3 = (
945
        agent.train_model(model=model_td3, tb_log_name="td3", total_timesteps=30000)
946
        if if_using_td3
947
        else None
948
    )
949

950
    agent = DRLAgent(env=env_train)
951
    SAC_PARAMS = {
952
        "batch_size": 128,
953
        "buffer_size": 1000000,
954
        "learning_rate": 0.0001,
955
        "learning_starts": 100,
956
        "ent_coef": "auto_0.1",
957
    }
958

959
    model_sac = agent.get_model("sac", model_kwargs=SAC_PARAMS)
960

961
    if if_using_sac:
962
        # set up logger
963
        tmp_path = RESULTS_DIR + "/sac"
964
        new_logger_sac = configure(tmp_path, ["stdout", "csv", "tensorboard"])
965
        # Set new logger
966
        model_sac.set_logger(new_logger_sac)
967

968
    trained_sac = (
969
        agent.train_model(model=model_sac, tb_log_name="sac", total_timesteps=30000)
970
        if if_using_sac
971
        else None
972
    )
973

974
    trade_data = data_split(processed_full, TEST_START_DATE, TEST_END_DATE)
975
    e_trade_gym = StockTradingEnv(df=trade_data, **env_kwargs)
976
    # env_trade, obs_trade = e_trade_gym.get_sb_env()
977

978
    df_account_value_ppo, df_actions_ppo = (
979
        DRLAgent.DRL_prediction(model=trained_ppo, environment=e_trade_gym)
980
        if if_using_ppo
981
        else [None, None]
982
    )
983

984
    df_account_value_ddpg, df_actions_ddpg = (
985
        DRLAgent.DRL_prediction(model=trained_ddpg, environment=e_trade_gym)
986
        if if_using_ddpg
987
        else [None, None]
988
    )
989

990
    df_account_value_a2c, df_actions_a2c = (
991
        DRLAgent.DRL_prediction(model=trained_a2c, environment=e_trade_gym)
992
        if if_using_a2c
993
        else [None, None]
994
    )
995

996
    df_account_value_td3, df_actions_td3 = (
997
        DRLAgent.DRL_prediction(model=trained_td3, environment=e_trade_gym)
998
        if if_using_td3
999
        else [None, None]
1000
    )
1001

1002
    df_account_value_sac, df_actions_sac = (
1003
        DRLAgent.DRL_prediction(model=trained_sac, environment=e_trade_gym)
1004
        if if_using_sac
1005
        else [None, None]
1006
    )
1007

1008
    print("==============Get Backtest Results===========")
1009
    now = datetime.datetime.now().strftime("%Y%m%d-%Hh%M")
1010

1011
    if if_using_ppo:
1012
        print("\n ppo:")
1013
        perf_stats_all_ppo = backtest_stats(account_value=df_account_value_ppo)
1014
        perf_stats_all_ppo = pd.DataFrame(perf_stats_all_ppo)
1015
        perf_stats_all_ppo.to_csv(
1016
            "./" + config.RESULTS_DIR + "/perf_stats_all_ppo_" + now + ".csv"
1017
        )
1018

1019
    if if_using_ddpg:
1020
        print("\n ddpg:")
1021
        perf_stats_all_ddpg = backtest_stats(account_value=df_account_value_ddpg)
1022
        perf_stats_all_ddpg = pd.DataFrame(perf_stats_all_ddpg)
1023
        perf_stats_all_ddpg.to_csv(
1024
            "./" + config.RESULTS_DIR + "/perf_stats_all_ddpg_" + now + ".csv"
1025
        )
1026

1027
    if if_using_a2c:
1028
        print("\n a2c:")
1029
        perf_stats_all_a2c = backtest_stats(account_value=df_account_value_a2c)
1030
        perf_stats_all_a2c = pd.DataFrame(perf_stats_all_a2c)
1031
        perf_stats_all_a2c.to_csv(
1032
            "./" + config.RESULTS_DIR + "/perf_stats_all_a2c_" + now + ".csv"
1033
        )
1034

1035
    if if_using_td3:
1036
        print("\n atd3:")
1037
        perf_stats_all_td3 = backtest_stats(account_value=df_account_value_td3)
1038
        perf_stats_all_td3 = pd.DataFrame(perf_stats_all_td3)
1039
        perf_stats_all_td3.to_csv(
1040
            "./" + config.RESULTS_DIR + "/perf_stats_all_td3_" + now + ".csv"
1041
        )
1042

1043
    if if_using_sac:
1044
        print("\n sac:")
1045
        perf_stats_all_sac = backtest_stats(account_value=df_account_value_sac)
1046
        perf_stats_all_sac = pd.DataFrame(perf_stats_all_sac)
1047
        perf_stats_all_sac.to_csv(
1048
            "./" + config.RESULTS_DIR + "/perf_stats_all_sac_" + now + ".csv"
1049
        )
1050

1051
    # baseline stats
1052
    print("==============Get Baseline Stats===========")
1053
    baseline_df = get_baseline(ticker="^DJI", start=TEST_START_DATE, end=TEST_END_DATE)
1054

1055
    stats = backtest_stats(baseline_df, value_col_name="close")
1056

1057
    print("==============Compare to DJIA===========")
1058

1059
    # S&P 500: ^GSPC
1060
    # Dow Jones Index: ^DJI
1061
    # NASDAQ 100: ^NDX
1062

1063
    if if_using_ppo:
1064
        backtest_plot(
1065
            df_account_value_ppo,
1066
            baseline_ticker="^DJI",
1067
            baseline_start=TEST_START_DATE,
1068
            baseline_end=TEST_END_DATE,
1069
        )
1070

1071
    if if_using_ddpg:
1072
        backtest_plot(
1073
            df_account_value_ddpg,
1074
            baseline_ticker="^DJI",
1075
            baseline_start=TEST_START_DATE,
1076
            baseline_end=TEST_END_DATE,
1077
        )
1078

1079
    if if_using_a2c:
1080
        backtest_plot(
1081
            df_account_value_a2c,
1082
            baseline_ticker="^DJI",
1083
            baseline_start=TEST_START_DATE,
1084
            baseline_end=TEST_END_DATE,
1085
        )
1086

1087
    if if_using_td3:
1088
        backtest_plot(
1089
            df_account_value_td3,
1090
            baseline_ticker="^DJI",
1091
            baseline_start=TEST_START_DATE,
1092
            baseline_end=TEST_END_DATE,
1093
        )
1094

1095
    if if_using_sac:
1096
        backtest_plot(
1097
            df_account_value_sac,
1098
            baseline_ticker="^DJI",
1099
            baseline_start=TEST_START_DATE,
1100
            baseline_end=TEST_END_DATE,
1101
        )
1102

1103

1104
if __name__ == "__main__":
1105
    main()
1106

1107
Product

Resources

Company