Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
AI4Finance-Foundation
GitHub Repository: AI4Finance-Foundation/FinRL
Path: blob/master/finrl/applications/stock_trading/fundamental_stock_trading.py
732 views
1
from __future__ import annotations
2
3
4
def main():
5
import pandas as pd
6
import numpy as np
7
import matplotlib
8
import matplotlib.pyplot as plt
9
10
# matplotlib.use('Agg')
11
import datetime
12
13
from finrl import config
14
from finrl import config_tickers
15
from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
16
from finrl.meta.preprocessor.preprocessors import FeatureEngineer, data_split
17
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
18
from finrl.agents.stablebaselines3.models import DRLAgent
19
from finrl.plot import backtest_stats, backtest_plot, get_daily_return, get_baseline
20
from finrl.main import check_and_make_directories
21
from pprint import pprint
22
from stable_baselines3.common.logger import configure
23
import sys
24
25
sys.path.append("../FinRL")
26
27
import itertools
28
29
from finrl.config import (
30
DATA_SAVE_DIR,
31
TRAINED_MODEL_DIR,
32
TENSORBOARD_LOG_DIR,
33
RESULTS_DIR,
34
INDICATORS,
35
TRAIN_START_DATE,
36
TRAIN_END_DATE,
37
TEST_START_DATE,
38
TEST_END_DATE,
39
TRADE_START_DATE,
40
TRADE_END_DATE,
41
)
42
43
from finrl.config_tickers import DOW_30_TICKER
44
45
check_and_make_directories(
46
[DATA_SAVE_DIR, TRAINED_MODEL_DIR, TENSORBOARD_LOG_DIR, RESULTS_DIR]
47
)
48
49
print(DOW_30_TICKER)
50
51
TRAIN_START_DATE = "2009-01-01"
52
TRAIN_END_DATE = "2019-01-01"
53
TEST_START_DATE = "2019-01-01"
54
TEST_END_DATE = "2021-01-01"
55
56
df = YahooDownloader(
57
start_date=TRAIN_START_DATE, end_date=TEST_END_DATE, ticker_list=DOW_30_TICKER
58
).fetch_data()
59
60
df["date"] = pd.to_datetime(df["date"], format="%Y-%m-%d")
61
62
df.sort_values(["date", "tic"], ignore_index=True).head()
63
64
# Import fundamental data from my GitHub repository
65
url = "https://raw.githubusercontent.com/mariko-sawada/FinRL_with_fundamental_data/main/dow_30_fundamental_wrds.csv"
66
67
fund = pd.read_csv(url)
68
69
# List items that are used to calculate financial ratios
70
71
items = [
72
"datadate", # Date
73
"tic", # Ticker
74
"oiadpq", # Quarterly operating income
75
"revtq", # Quartely revenue
76
"niq", # Quartely net income
77
"atq", # Total asset
78
"teqq", # Shareholder's equity
79
"epspiy", # EPS(Basic) incl. Extraordinary items
80
"ceqq", # Common Equity
81
"cshoq", # Common Shares Outstanding
82
"dvpspq", # Dividends per share
83
"actq", # Current assets
84
"lctq", # Current liabilities
85
"cheq", # Cash & Equivalent
86
"rectq", # Recievalbles
87
"cogsq", # Cost of Goods Sold
88
"invtq", # Inventories
89
"apq", # Account payable
90
"dlttq", # Long term debt
91
"dlcq", # Debt in current liabilites
92
"ltq", # Liabilities
93
]
94
95
# Omit items that will not be used
96
fund_data = fund[items]
97
98
# Rename column names for the sake of readability
99
fund_data = fund_data.rename(
100
columns={
101
"datadate": "date", # Date
102
"oiadpq": "op_inc_q", # Quarterly operating income
103
"revtq": "rev_q", # Quartely revenue
104
"niq": "net_inc_q", # Quartely net income
105
"atq": "tot_assets", # Assets
106
"teqq": "sh_equity", # Shareholder's equity
107
"epspiy": "eps_incl_ex", # EPS(Basic) incl. Extraordinary items
108
"ceqq": "com_eq", # Common Equity
109
"cshoq": "sh_outstanding", # Common Shares Outstanding
110
"dvpspq": "div_per_sh", # Dividends per share
111
"actq": "cur_assets", # Current assets
112
"lctq": "cur_liabilities", # Current liabilities
113
"cheq": "cash_eq", # Cash & Equivalent
114
"rectq": "receivables", # Receivalbles
115
"cogsq": "cogs_q", # Cost of Goods Sold
116
"invtq": "inventories", # Inventories
117
"apq": "payables", # Account payable
118
"dlttq": "long_debt", # Long term debt
119
"dlcq": "short_debt", # Debt in current liabilites
120
"ltq": "tot_liabilities", # Liabilities
121
}
122
)
123
124
# Calculate financial ratios
125
date = pd.to_datetime(fund_data["date"], format="%Y%m%d")
126
127
tic = fund_data["tic"].to_frame("tic")
128
129
# Profitability ratios
130
# Operating Margin
131
OPM = pd.Series(np.empty(fund_data.shape[0], dtype=object), name="OPM")
132
for i in range(0, fund_data.shape[0]):
133
if i - 3 < 0:
134
OPM[i] = np.nan
135
elif fund_data.iloc[i, 1] != fund_data.iloc[i - 3, 1]:
136
OPM.iloc[i] = np.nan
137
else:
138
OPM.iloc[i] = np.sum(fund_data["op_inc_q"].iloc[i - 3 : i]) / np.sum(
139
fund_data["rev_q"].iloc[i - 3 : i]
140
)
141
142
# Net Profit Margin
143
NPM = pd.Series(np.empty(fund_data.shape[0], dtype=object), name="NPM")
144
for i in range(0, fund_data.shape[0]):
145
if i - 3 < 0:
146
NPM[i] = np.nan
147
elif fund_data.iloc[i, 1] != fund_data.iloc[i - 3, 1]:
148
NPM.iloc[i] = np.nan
149
else:
150
NPM.iloc[i] = np.sum(fund_data["net_inc_q"].iloc[i - 3 : i]) / np.sum(
151
fund_data["rev_q"].iloc[i - 3 : i]
152
)
153
154
# Return On Assets
155
ROA = pd.Series(np.empty(fund_data.shape[0], dtype=object), name="ROA")
156
for i in range(0, fund_data.shape[0]):
157
if i - 3 < 0:
158
ROA[i] = np.nan
159
elif fund_data.iloc[i, 1] != fund_data.iloc[i - 3, 1]:
160
ROA.iloc[i] = np.nan
161
else:
162
ROA.iloc[i] = (
163
np.sum(fund_data["net_inc_q"].iloc[i - 3 : i])
164
/ fund_data["tot_assets"].iloc[i]
165
)
166
167
# Return on Equity
168
ROE = pd.Series(np.empty(fund_data.shape[0], dtype=object), name="ROE")
169
for i in range(0, fund_data.shape[0]):
170
if i - 3 < 0:
171
ROE[i] = np.nan
172
elif fund_data.iloc[i, 1] != fund_data.iloc[i - 3, 1]:
173
ROE.iloc[i] = np.nan
174
else:
175
ROE.iloc[i] = (
176
np.sum(fund_data["net_inc_q"].iloc[i - 3 : i])
177
/ fund_data["sh_equity"].iloc[i]
178
)
179
180
# For calculating valuation ratios in the next subpart, calculate per share items in advance
181
# Earnings Per Share
182
EPS = fund_data["eps_incl_ex"].to_frame("EPS")
183
184
# Book Per Share
185
BPS = (fund_data["com_eq"] / fund_data["sh_outstanding"]).to_frame(
186
"BPS"
187
) # Need to check units
188
189
# Dividend Per Share
190
DPS = fund_data["div_per_sh"].to_frame("DPS")
191
192
# Liquidity ratios
193
# Current ratio
194
cur_ratio = (fund_data["cur_assets"] / fund_data["cur_liabilities"]).to_frame(
195
"cur_ratio"
196
)
197
198
# Quick ratio
199
quick_ratio = (
200
(fund_data["cash_eq"] + fund_data["receivables"]) / fund_data["cur_liabilities"]
201
).to_frame("quick_ratio")
202
203
# Cash ratio
204
cash_ratio = (fund_data["cash_eq"] / fund_data["cur_liabilities"]).to_frame(
205
"cash_ratio"
206
)
207
208
# Efficiency ratios
209
# Inventory turnover ratio
210
inv_turnover = pd.Series(
211
np.empty(fund_data.shape[0], dtype=object), name="inv_turnover"
212
)
213
for i in range(0, fund_data.shape[0]):
214
if i - 3 < 0:
215
inv_turnover[i] = np.nan
216
elif fund_data.iloc[i, 1] != fund_data.iloc[i - 3, 1]:
217
inv_turnover.iloc[i] = np.nan
218
else:
219
inv_turnover.iloc[i] = (
220
np.sum(fund_data["cogs_q"].iloc[i - 3 : i])
221
/ fund_data["inventories"].iloc[i]
222
)
223
224
# Receivables turnover ratio
225
acc_rec_turnover = pd.Series(
226
np.empty(fund_data.shape[0], dtype=object), name="acc_rec_turnover"
227
)
228
for i in range(0, fund_data.shape[0]):
229
if i - 3 < 0:
230
acc_rec_turnover[i] = np.nan
231
elif fund_data.iloc[i, 1] != fund_data.iloc[i - 3, 1]:
232
acc_rec_turnover.iloc[i] = np.nan
233
else:
234
acc_rec_turnover.iloc[i] = (
235
np.sum(fund_data["rev_q"].iloc[i - 3 : i])
236
/ fund_data["receivables"].iloc[i]
237
)
238
239
# Payable turnover ratio
240
acc_pay_turnover = pd.Series(
241
np.empty(fund_data.shape[0], dtype=object), name="acc_pay_turnover"
242
)
243
for i in range(0, fund_data.shape[0]):
244
if i - 3 < 0:
245
acc_pay_turnover[i] = np.nan
246
elif fund_data.iloc[i, 1] != fund_data.iloc[i - 3, 1]:
247
acc_pay_turnover.iloc[i] = np.nan
248
else:
249
acc_pay_turnover.iloc[i] = (
250
np.sum(fund_data["cogs_q"].iloc[i - 3 : i])
251
/ fund_data["payables"].iloc[i]
252
)
253
254
## Leverage financial ratios
255
# Debt ratio
256
debt_ratio = (fund_data["tot_liabilities"] / fund_data["tot_assets"]).to_frame(
257
"debt_ratio"
258
)
259
260
# Debt to Equity ratio
261
debt_to_equity = (fund_data["tot_liabilities"] / fund_data["sh_equity"]).to_frame(
262
"debt_to_equity"
263
)
264
265
# Create a dataframe that merges all the ratios
266
ratios = pd.concat(
267
[
268
date,
269
tic,
270
OPM,
271
NPM,
272
ROA,
273
ROE,
274
EPS,
275
BPS,
276
DPS,
277
cur_ratio,
278
quick_ratio,
279
cash_ratio,
280
inv_turnover,
281
acc_rec_turnover,
282
acc_pay_turnover,
283
debt_ratio,
284
debt_to_equity,
285
],
286
axis=1,
287
)
288
289
# Replace NAs infinite values with zero
290
final_ratios = ratios.copy()
291
final_ratios = final_ratios.fillna(0)
292
final_ratios = final_ratios.replace(np.inf, 0)
293
294
list_ticker = df["tic"].unique().tolist()
295
list_date = list(pd.date_range(df["date"].min(), df["date"].max()))
296
combination = list(itertools.product(list_date, list_ticker))
297
298
# Merge stock price data and ratios into one dataframe
299
processed_full = pd.DataFrame(combination, columns=["date", "tic"]).merge(
300
df, on=["date", "tic"], how="left"
301
)
302
processed_full = processed_full.merge(final_ratios, how="left", on=["date", "tic"])
303
processed_full = processed_full.sort_values(["tic", "date"])
304
305
# Backfill the ratio data to make them daily
306
processed_full = processed_full.bfill(axis="rows")
307
308
# Calculate P/E, P/B and dividend yield using daily closing price
309
processed_full["PE"] = processed_full["close"] / processed_full["EPS"]
310
processed_full["PB"] = processed_full["close"] / processed_full["BPS"]
311
processed_full["Div_yield"] = processed_full["DPS"] / processed_full["close"]
312
313
# Drop per share items used for the above calculation
314
processed_full = processed_full.drop(columns=["day", "EPS", "BPS", "DPS"])
315
# Replace NAs infinite values with zero
316
processed_full = processed_full.copy()
317
processed_full = processed_full.fillna(0)
318
processed_full = processed_full.replace(np.inf, 0)
319
320
# Check the final data
321
processed_full.sort_values(["date", "tic"], ignore_index=True).head(10)
322
323
train_data = data_split(processed_full, TRAIN_START_DATE, TRAIN_END_DATE)
324
trade_data = data_split(processed_full, TEST_START_DATE, TEST_END_DATE)
325
# Check the length of the two datasets
326
print(len(train_data))
327
print(len(trade_data))
328
329
import gym
330
import matplotlib
331
import matplotlib.pyplot as plt
332
import numpy as np
333
import pandas as pd
334
from gym import spaces
335
from gym.utils import seeding
336
from stable_baselines3.common.vec_env import DummyVecEnv
337
338
matplotlib.use("Agg")
339
340
# from stable_baselines3.common import logger
341
342
class StockTradingEnv(gym.Env):
343
"""A stock trading environment for OpenAI gym"""
344
345
metadata = {"render.modes": ["human"]}
346
347
def __init__(
348
self,
349
df,
350
stock_dim,
351
hmax,
352
initial_amount,
353
buy_cost_pct,
354
sell_cost_pct,
355
reward_scaling,
356
state_space,
357
action_space,
358
tech_indicator_list,
359
turbulence_threshold=None,
360
risk_indicator_col="turbulence",
361
make_plots=False,
362
print_verbosity=10,
363
day=0,
364
initial=True,
365
previous_state=[],
366
model_name="",
367
mode="",
368
iteration="",
369
):
370
self.day = day
371
self.df = df
372
self.stock_dim = stock_dim
373
self.hmax = hmax
374
self.initial_amount = initial_amount
375
self.buy_cost_pct = buy_cost_pct
376
self.sell_cost_pct = sell_cost_pct
377
self.reward_scaling = reward_scaling
378
self.state_space = state_space
379
self.action_space = action_space
380
self.tech_indicator_list = tech_indicator_list
381
self.action_space = spaces.Box(low=-1, high=1, shape=(self.action_space,))
382
self.observation_space = spaces.Box(
383
low=-np.inf, high=np.inf, shape=(self.state_space,)
384
)
385
self.data = self.df.loc[self.day, :]
386
self.terminal = False
387
self.make_plots = make_plots
388
self.print_verbosity = print_verbosity
389
self.turbulence_threshold = turbulence_threshold
390
self.risk_indicator_col = risk_indicator_col
391
self.initial = initial
392
self.previous_state = previous_state
393
self.model_name = model_name
394
self.mode = mode
395
self.iteration = iteration
396
# initalize state
397
self.state = self._initiate_state()
398
399
# initialize reward
400
self.reward = 0
401
self.turbulence = 0
402
self.cost = 0
403
self.trades = 0
404
self.episode = 0
405
# memorize all the total balance change
406
self.asset_memory = [self.initial_amount]
407
self.rewards_memory = []
408
self.actions_memory = []
409
self.date_memory = [self._get_date()]
410
# self.reset()
411
self._seed()
412
413
def _sell_stock(self, index, action):
414
def _do_sell_normal():
415
if self.state[index + 1] > 0:
416
# Sell only if the price is > 0 (no missing data in this particular date)
417
# perform sell action based on the sign of the action
418
if self.state[index + self.stock_dim + 1] > 0:
419
# Sell only if current asset is > 0
420
sell_num_shares = min(
421
abs(action), self.state[index + self.stock_dim + 1]
422
)
423
sell_amount = (
424
self.state[index + 1]
425
* sell_num_shares
426
* (1 - self.sell_cost_pct)
427
)
428
# update balance
429
self.state[0] += sell_amount
430
431
self.state[index + self.stock_dim + 1] -= sell_num_shares
432
self.cost += (
433
self.state[index + 1] * sell_num_shares * self.sell_cost_pct
434
)
435
self.trades += 1
436
else:
437
sell_num_shares = 0
438
else:
439
sell_num_shares = 0
440
441
return sell_num_shares
442
443
# perform sell action based on the sign of the action
444
if self.turbulence_threshold is not None:
445
if self.turbulence >= self.turbulence_threshold:
446
if self.state[index + 1] > 0:
447
# Sell only if the price is > 0 (no missing data in this particular date)
448
# if turbulence goes over threshold, just clear out all positions
449
if self.state[index + self.stock_dim + 1] > 0:
450
# Sell only if current asset is > 0
451
sell_num_shares = self.state[index + self.stock_dim + 1]
452
sell_amount = (
453
self.state[index + 1]
454
* sell_num_shares
455
* (1 - self.sell_cost_pct)
456
)
457
# update balance
458
self.state[0] += sell_amount
459
self.state[index + self.stock_dim + 1] = 0
460
self.cost += (
461
self.state[index + 1]
462
* sell_num_shares
463
* self.sell_cost_pct
464
)
465
self.trades += 1
466
else:
467
sell_num_shares = 0
468
else:
469
sell_num_shares = 0
470
else:
471
sell_num_shares = _do_sell_normal()
472
else:
473
sell_num_shares = _do_sell_normal()
474
475
return sell_num_shares
476
477
def _buy_stock(self, index, action):
478
def _do_buy():
479
if self.state[index + 1] > 0:
480
# Buy only if the price is > 0 (no missing data in this particular date)
481
available_amount = self.state[0] // self.state[index + 1]
482
# print('available_amount:{}'.format(available_amount))
483
484
# update balance
485
buy_num_shares = min(available_amount, action)
486
buy_amount = (
487
self.state[index + 1] * buy_num_shares * (1 + self.buy_cost_pct)
488
)
489
self.state[0] -= buy_amount
490
491
self.state[index + self.stock_dim + 1] += buy_num_shares
492
493
self.cost += (
494
self.state[index + 1] * buy_num_shares * self.buy_cost_pct
495
)
496
self.trades += 1
497
else:
498
buy_num_shares = 0
499
500
return buy_num_shares
501
502
# perform buy action based on the sign of the action
503
if self.turbulence_threshold is None:
504
buy_num_shares = _do_buy()
505
else:
506
if self.turbulence < self.turbulence_threshold:
507
buy_num_shares = _do_buy()
508
else:
509
buy_num_shares = 0
510
pass
511
512
return buy_num_shares
513
514
def _make_plot(self):
515
plt.plot(self.asset_memory, "r")
516
plt.savefig(f"results/account_value_trade_{self.episode}.png")
517
plt.close()
518
519
def step(self, actions):
520
self.terminal = self.day >= len(self.df.index.unique()) - 1
521
if self.terminal:
522
# print(f"Episode: {self.episode}")
523
if self.make_plots:
524
self._make_plot()
525
end_total_asset = self.state[0] + sum(
526
np.array(self.state[1 : (self.stock_dim + 1)])
527
* np.array(
528
self.state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)]
529
)
530
)
531
df_total_value = pd.DataFrame(self.asset_memory)
532
tot_reward = (
533
self.state[0]
534
+ sum(
535
np.array(self.state[1 : (self.stock_dim + 1)])
536
* np.array(
537
self.state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)]
538
)
539
)
540
- self.initial_amount
541
)
542
df_total_value.columns = ["account_value"]
543
df_total_value["date"] = self.date_memory
544
df_total_value["daily_return"] = df_total_value[
545
"account_value"
546
].pct_change(1)
547
if df_total_value["daily_return"].std() != 0:
548
sharpe = (
549
(252**0.5)
550
* df_total_value["daily_return"].mean()
551
/ df_total_value["daily_return"].std()
552
)
553
df_rewards = pd.DataFrame(self.rewards_memory)
554
df_rewards.columns = ["account_rewards"]
555
df_rewards["date"] = self.date_memory[:-1]
556
if self.episode % self.print_verbosity == 0:
557
print(f"day: {self.day}, episode: {self.episode}")
558
print(f"begin_total_asset: {self.asset_memory[0]:0.2f}")
559
print(f"end_total_asset: {end_total_asset:0.2f}")
560
print(f"total_reward: {tot_reward:0.2f}")
561
print(f"total_cost: {self.cost:0.2f}")
562
print(f"total_trades: {self.trades}")
563
if df_total_value["daily_return"].std() != 0:
564
print(f"Sharpe: {sharpe:0.3f}")
565
print("=================================")
566
567
if (self.model_name != "") and (self.mode != ""):
568
df_actions = self.save_action_memory()
569
df_actions.to_csv(
570
"results/actions_{}_{}_{}.csv".format(
571
self.mode, self.model_name, self.iteration
572
)
573
)
574
df_total_value.to_csv(
575
"results/account_value_{}_{}_{}.csv".format(
576
self.mode, self.model_name, self.iteration
577
),
578
index=False,
579
)
580
df_rewards.to_csv(
581
"results/account_rewards_{}_{}_{}.csv".format(
582
self.mode, self.model_name, self.iteration
583
),
584
index=False,
585
)
586
plt.plot(self.asset_memory, "r")
587
plt.savefig(
588
"results/account_value_{}_{}_{}.png".format(
589
self.mode, self.model_name, self.iteration
590
),
591
index=False,
592
)
593
plt.close()
594
595
# Add outputs to logger interface
596
# logger.record("environment/portfolio_value", end_total_asset)
597
# logger.record("environment/total_reward", tot_reward)
598
# logger.record("environment/total_reward_pct", (tot_reward / (end_total_asset - tot_reward)) * 100)
599
# logger.record("environment/total_cost", self.cost)
600
# logger.record("environment/total_trades", self.trades)
601
602
return self.state, self.reward, self.terminal, {}
603
604
else:
605
actions = (
606
actions * self.hmax
607
) # actions initially is scaled between 0 to 1
608
actions = actions.astype(
609
int
610
) # convert into integer because we can't by fraction of shares
611
if self.turbulence_threshold is not None:
612
if self.turbulence >= self.turbulence_threshold:
613
actions = np.array([-self.hmax] * self.stock_dim)
614
begin_total_asset = self.state[0] + sum(
615
np.array(self.state[1 : (self.stock_dim + 1)])
616
* np.array(
617
self.state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)]
618
)
619
)
620
# print("begin_total_asset:{}".format(begin_total_asset))
621
622
argsort_actions = np.argsort(actions)
623
624
sell_index = argsort_actions[: np.where(actions < 0)[0].shape[0]]
625
buy_index = argsort_actions[::-1][: np.where(actions > 0)[0].shape[0]]
626
627
for index in sell_index:
628
# print(f"Num shares before: {self.state[index+self.stock_dim+1]}")
629
# print(f'take sell action before : {actions[index]}')
630
actions[index] = self._sell_stock(index, actions[index]) * (-1)
631
# print(f'take sell action after : {actions[index]}')
632
# print(f"Num shares after: {self.state[index+self.stock_dim+1]}")
633
634
for index in buy_index:
635
# print('take buy action: {}'.format(actions[index]))
636
actions[index] = self._buy_stock(index, actions[index])
637
638
self.actions_memory.append(actions)
639
640
# state: s -> s+1
641
self.day += 1
642
self.data = self.df.loc[self.day, :]
643
if self.turbulence_threshold is not None:
644
if len(self.df.tic.unique()) == 1:
645
self.turbulence = self.data[self.risk_indicator_col]
646
elif len(self.df.tic.unique()) > 1:
647
self.turbulence = self.data[self.risk_indicator_col].values[0]
648
self.state = self._update_state()
649
650
end_total_asset = self.state[0] + sum(
651
np.array(self.state[1 : (self.stock_dim + 1)])
652
* np.array(
653
self.state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)]
654
)
655
)
656
self.asset_memory.append(end_total_asset)
657
self.date_memory.append(self._get_date())
658
self.reward = end_total_asset - begin_total_asset
659
self.rewards_memory.append(self.reward)
660
self.reward = self.reward * self.reward_scaling
661
662
return self.state, self.reward, self.terminal, {}
663
664
def reset(self):
665
# initiate state
666
self.state = self._initiate_state()
667
668
if self.initial:
669
self.asset_memory = [self.initial_amount]
670
else:
671
previous_total_asset = self.previous_state[0] + sum(
672
np.array(self.state[1 : (self.stock_dim + 1)])
673
* np.array(
674
self.previous_state[
675
(self.stock_dim + 1) : (self.stock_dim * 2 + 1)
676
]
677
)
678
)
679
self.asset_memory = [previous_total_asset]
680
681
self.day = 0
682
self.data = self.df.loc[self.day, :]
683
self.turbulence = 0
684
self.cost = 0
685
self.trades = 0
686
self.terminal = False
687
# self.iteration=self.iteration
688
self.rewards_memory = []
689
self.actions_memory = []
690
self.date_memory = [self._get_date()]
691
692
self.episode += 1
693
694
return self.state
695
696
def render(self, mode="human", close=False):
697
return self.state
698
699
def _initiate_state(self):
700
if self.initial:
701
# For Initial State
702
if len(self.df.tic.unique()) > 1:
703
# for multiple stock
704
state = (
705
[self.initial_amount]
706
+ self.data.close.values.tolist()
707
+ [0] * self.stock_dim
708
+ sum(
709
[
710
self.data[tech].values.tolist()
711
for tech in self.tech_indicator_list
712
],
713
[],
714
)
715
)
716
else:
717
# for single stock
718
state = (
719
[self.initial_amount]
720
+ [self.data.close]
721
+ [0] * self.stock_dim
722
+ sum(
723
[[self.data[tech]] for tech in self.tech_indicator_list], []
724
)
725
)
726
else:
727
# Using Previous State
728
if len(self.df.tic.unique()) > 1:
729
# for multiple stock
730
state = (
731
[self.previous_state[0]]
732
+ self.data.close.values.tolist()
733
+ self.previous_state[
734
(self.stock_dim + 1) : (self.stock_dim * 2 + 1)
735
]
736
+ sum(
737
[
738
self.data[tech].values.tolist()
739
for tech in self.tech_indicator_list
740
],
741
[],
742
)
743
)
744
else:
745
# for single stock
746
state = (
747
[self.previous_state[0]]
748
+ [self.data.close]
749
+ self.previous_state[
750
(self.stock_dim + 1) : (self.stock_dim * 2 + 1)
751
]
752
+ sum(
753
[[self.data[tech]] for tech in self.tech_indicator_list], []
754
)
755
)
756
return state
757
758
def _update_state(self):
759
if len(self.df.tic.unique()) > 1:
760
# for multiple stock
761
state = (
762
[self.state[0]]
763
+ self.data.close.values.tolist()
764
+ list(self.state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)])
765
+ sum(
766
[
767
self.data[tech].values.tolist()
768
for tech in self.tech_indicator_list
769
],
770
[],
771
)
772
)
773
774
else:
775
# for single stock
776
state = (
777
[self.state[0]]
778
+ [self.data.close]
779
+ list(self.state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)])
780
+ sum([[self.data[tech]] for tech in self.tech_indicator_list], [])
781
)
782
return state
783
784
def _get_date(self):
785
if len(self.df.tic.unique()) > 1:
786
date = self.data.date.unique()[0]
787
else:
788
date = self.data.date
789
return date
790
791
def save_asset_memory(self):
792
date_list = self.date_memory
793
asset_list = self.asset_memory
794
# print(len(date_list))
795
# print(len(asset_list))
796
df_account_value = pd.DataFrame(
797
{"date": date_list, "account_value": asset_list}
798
)
799
return df_account_value
800
801
def save_action_memory(self):
802
if len(self.df.tic.unique()) > 1:
803
# date and close price length must match actions length
804
date_list = self.date_memory[:-1]
805
df_date = pd.DataFrame(date_list)
806
df_date.columns = ["date"]
807
808
action_list = self.actions_memory
809
df_actions = pd.DataFrame(action_list)
810
df_actions.columns = self.data.tic.values
811
df_actions.index = df_date.date
812
# df_actions = pd.DataFrame({'date':date_list,'actions':action_list})
813
else:
814
date_list = self.date_memory[:-1]
815
action_list = self.actions_memory
816
df_actions = pd.DataFrame({"date": date_list, "actions": action_list})
817
return df_actions
818
819
def _seed(self, seed=None):
820
self.np_random, seed = seeding.np_random(seed)
821
return [seed]
822
823
def get_sb_env(self):
824
e = DummyVecEnv([lambda: self])
825
obs = e.reset()
826
return e, obs
827
828
ratio_list = [
829
"OPM",
830
"NPM",
831
"ROA",
832
"ROE",
833
"cur_ratio",
834
"quick_ratio",
835
"cash_ratio",
836
"inv_turnover",
837
"acc_rec_turnover",
838
"acc_pay_turnover",
839
"debt_ratio",
840
"debt_to_equity",
841
"PE",
842
"PB",
843
"Div_yield",
844
]
845
846
stock_dimension = len(train_data.tic.unique())
847
state_space = 1 + 2 * stock_dimension + len(ratio_list) * stock_dimension
848
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")
849
850
# Parameters for the environment
851
env_kwargs = {
852
"hmax": 100,
853
"initial_amount": 1000000,
854
"buy_cost_pct": 0.001,
855
"sell_cost_pct": 0.001,
856
"state_space": state_space,
857
"stock_dim": stock_dimension,
858
"tech_indicator_list": ratio_list,
859
"action_space": stock_dimension,
860
"reward_scaling": 1e-4,
861
}
862
863
# Establish the training environment using StockTradingEnv() class
864
e_train_gym = StockTradingEnv(df=train_data, **env_kwargs)
865
866
env_train, _ = e_train_gym.get_sb_env()
867
print(type(env_train))
868
869
# Set up the agent using DRLAgent() class using the environment created in the previous part
870
agent = DRLAgent(env=env_train)
871
872
if_using_a2c = False
873
if_using_ddpg = False
874
if_using_ppo = False
875
if_using_td3 = False
876
if_using_sac = True
877
878
agent = DRLAgent(env=env_train)
879
PPO_PARAMS = {
880
"n_steps": 2048,
881
"ent_coef": 0.01,
882
"learning_rate": 0.00025,
883
"batch_size": 128,
884
}
885
model_ppo = agent.get_model("ppo", model_kwargs=PPO_PARAMS)
886
887
if if_using_ppo:
888
# set up logger
889
tmp_path = RESULTS_DIR + "/ppo"
890
new_logger_ppo = configure(tmp_path, ["stdout", "csv", "tensorboard"])
891
# Set new logger
892
model_ppo.set_logger(new_logger_ppo)
893
894
trained_ppo = (
895
agent.train_model(model=model_ppo, tb_log_name="ppo", total_timesteps=50000)
896
if if_using_ppo
897
else None
898
)
899
900
agent = DRLAgent(env=env_train)
901
model_ddpg = agent.get_model("ddpg")
902
903
if if_using_ddpg:
904
# set up logger
905
tmp_path = RESULTS_DIR + "/ddpg"
906
new_logger_ddpg = configure(tmp_path, ["stdout", "csv", "tensorboard"])
907
# Set new logger
908
model_ddpg.set_logger(new_logger_ddpg)
909
910
trained_ddpg = (
911
agent.train_model(model=model_ddpg, tb_log_name="ddpg", total_timesteps=50000)
912
if if_using_ddpg
913
else None
914
)
915
916
agent = DRLAgent(env=env_train)
917
model_a2c = agent.get_model("a2c")
918
919
if if_using_a2c:
920
# set up logger
921
tmp_path = RESULTS_DIR + "/a2c"
922
new_logger_a2c = configure(tmp_path, ["stdout", "csv", "tensorboard"])
923
# Set new logger
924
model_a2c.set_logger(new_logger_a2c)
925
926
trained_a2c = (
927
agent.train_model(model=model_a2c, tb_log_name="a2c", total_timesteps=50000)
928
if if_using_a2c
929
else None
930
)
931
932
agent = DRLAgent(env=env_train)
933
TD3_PARAMS = {"batch_size": 100, "buffer_size": 1000000, "learning_rate": 0.001}
934
935
model_td3 = agent.get_model("td3", model_kwargs=TD3_PARAMS)
936
937
if if_using_td3:
938
# set up logger
939
tmp_path = RESULTS_DIR + "/td3"
940
new_logger_td3 = configure(tmp_path, ["stdout", "csv", "tensorboard"])
941
# Set new logger
942
model_td3.set_logger(new_logger_td3)
943
944
trained_td3 = (
945
agent.train_model(model=model_td3, tb_log_name="td3", total_timesteps=30000)
946
if if_using_td3
947
else None
948
)
949
950
agent = DRLAgent(env=env_train)
951
SAC_PARAMS = {
952
"batch_size": 128,
953
"buffer_size": 1000000,
954
"learning_rate": 0.0001,
955
"learning_starts": 100,
956
"ent_coef": "auto_0.1",
957
}
958
959
model_sac = agent.get_model("sac", model_kwargs=SAC_PARAMS)
960
961
if if_using_sac:
962
# set up logger
963
tmp_path = RESULTS_DIR + "/sac"
964
new_logger_sac = configure(tmp_path, ["stdout", "csv", "tensorboard"])
965
# Set new logger
966
model_sac.set_logger(new_logger_sac)
967
968
trained_sac = (
969
agent.train_model(model=model_sac, tb_log_name="sac", total_timesteps=30000)
970
if if_using_sac
971
else None
972
)
973
974
trade_data = data_split(processed_full, TEST_START_DATE, TEST_END_DATE)
975
e_trade_gym = StockTradingEnv(df=trade_data, **env_kwargs)
976
# env_trade, obs_trade = e_trade_gym.get_sb_env()
977
978
df_account_value_ppo, df_actions_ppo = (
979
DRLAgent.DRL_prediction(model=trained_ppo, environment=e_trade_gym)
980
if if_using_ppo
981
else [None, None]
982
)
983
984
df_account_value_ddpg, df_actions_ddpg = (
985
DRLAgent.DRL_prediction(model=trained_ddpg, environment=e_trade_gym)
986
if if_using_ddpg
987
else [None, None]
988
)
989
990
df_account_value_a2c, df_actions_a2c = (
991
DRLAgent.DRL_prediction(model=trained_a2c, environment=e_trade_gym)
992
if if_using_a2c
993
else [None, None]
994
)
995
996
df_account_value_td3, df_actions_td3 = (
997
DRLAgent.DRL_prediction(model=trained_td3, environment=e_trade_gym)
998
if if_using_td3
999
else [None, None]
1000
)
1001
1002
df_account_value_sac, df_actions_sac = (
1003
DRLAgent.DRL_prediction(model=trained_sac, environment=e_trade_gym)
1004
if if_using_sac
1005
else [None, None]
1006
)
1007
1008
print("==============Get Backtest Results===========")
1009
now = datetime.datetime.now().strftime("%Y%m%d-%Hh%M")
1010
1011
if if_using_ppo:
1012
print("\n ppo:")
1013
perf_stats_all_ppo = backtest_stats(account_value=df_account_value_ppo)
1014
perf_stats_all_ppo = pd.DataFrame(perf_stats_all_ppo)
1015
perf_stats_all_ppo.to_csv(
1016
"./" + config.RESULTS_DIR + "/perf_stats_all_ppo_" + now + ".csv"
1017
)
1018
1019
if if_using_ddpg:
1020
print("\n ddpg:")
1021
perf_stats_all_ddpg = backtest_stats(account_value=df_account_value_ddpg)
1022
perf_stats_all_ddpg = pd.DataFrame(perf_stats_all_ddpg)
1023
perf_stats_all_ddpg.to_csv(
1024
"./" + config.RESULTS_DIR + "/perf_stats_all_ddpg_" + now + ".csv"
1025
)
1026
1027
if if_using_a2c:
1028
print("\n a2c:")
1029
perf_stats_all_a2c = backtest_stats(account_value=df_account_value_a2c)
1030
perf_stats_all_a2c = pd.DataFrame(perf_stats_all_a2c)
1031
perf_stats_all_a2c.to_csv(
1032
"./" + config.RESULTS_DIR + "/perf_stats_all_a2c_" + now + ".csv"
1033
)
1034
1035
if if_using_td3:
1036
print("\n atd3:")
1037
perf_stats_all_td3 = backtest_stats(account_value=df_account_value_td3)
1038
perf_stats_all_td3 = pd.DataFrame(perf_stats_all_td3)
1039
perf_stats_all_td3.to_csv(
1040
"./" + config.RESULTS_DIR + "/perf_stats_all_td3_" + now + ".csv"
1041
)
1042
1043
if if_using_sac:
1044
print("\n sac:")
1045
perf_stats_all_sac = backtest_stats(account_value=df_account_value_sac)
1046
perf_stats_all_sac = pd.DataFrame(perf_stats_all_sac)
1047
perf_stats_all_sac.to_csv(
1048
"./" + config.RESULTS_DIR + "/perf_stats_all_sac_" + now + ".csv"
1049
)
1050
1051
# baseline stats
1052
print("==============Get Baseline Stats===========")
1053
baseline_df = get_baseline(ticker="^DJI", start=TEST_START_DATE, end=TEST_END_DATE)
1054
1055
stats = backtest_stats(baseline_df, value_col_name="close")
1056
1057
print("==============Compare to DJIA===========")
1058
1059
# S&P 500: ^GSPC
1060
# Dow Jones Index: ^DJI
1061
# NASDAQ 100: ^NDX
1062
1063
if if_using_ppo:
1064
backtest_plot(
1065
df_account_value_ppo,
1066
baseline_ticker="^DJI",
1067
baseline_start=TEST_START_DATE,
1068
baseline_end=TEST_END_DATE,
1069
)
1070
1071
if if_using_ddpg:
1072
backtest_plot(
1073
df_account_value_ddpg,
1074
baseline_ticker="^DJI",
1075
baseline_start=TEST_START_DATE,
1076
baseline_end=TEST_END_DATE,
1077
)
1078
1079
if if_using_a2c:
1080
backtest_plot(
1081
df_account_value_a2c,
1082
baseline_ticker="^DJI",
1083
baseline_start=TEST_START_DATE,
1084
baseline_end=TEST_END_DATE,
1085
)
1086
1087
if if_using_td3:
1088
backtest_plot(
1089
df_account_value_td3,
1090
baseline_ticker="^DJI",
1091
baseline_start=TEST_START_DATE,
1092
baseline_end=TEST_END_DATE,
1093
)
1094
1095
if if_using_sac:
1096
backtest_plot(
1097
df_account_value_sac,
1098
baseline_ticker="^DJI",
1099
baseline_start=TEST_START_DATE,
1100
baseline_end=TEST_END_DATE,
1101
)
1102
1103
1104
if __name__ == "__main__":
1105
main()
1106
1107