Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
AI4Finance-Foundation
GitHub Repository: AI4Finance-Foundation/FinRL
Path: blob/master/finrl/applications/imitation_learning/Weight_Initialization.ipynb
732 views
Kernel: finrl

Installation Setup

%load_ext autoreload %autoreload 2
import numpy as np import pandas as pd import dask.dataframe as dd from scipy import stats import statsmodels.api as sm from statsmodels.regression.rolling import RollingOLS import matplotlib.pyplot as plt from matplotlib import ticker import seaborn as sns; sns.set() from pandas.tseries.offsets import * from dateutil.relativedelta import * import datetime as dt import os from linearmodels.asset_pricing import TradedFactorModel, LinearFactorModel from IPython.core.pylabtools import figsize from IPython.core.interactiveshell import InteractiveShell from fredapi import Fred fred = Fred(api_key = 'b0363f9c9d853b92b27e06c4727bc2ea') import pandas_datareader.data as web %matplotlib inline %pylab inline pylab.rcParams['figure.figsize'] = (20,10)
%pylab is deprecated, use %matplotlib inline and import the required libraries. Populating the interactive namespace from numpy and matplotlib
C:\Users\kentw\AppData\Local\Continuum\anaconda3\envs\finrl\lib\site-packages\IPython\core\magics\pylab.py:162: UserWarning: pylab import has clobbered these variables: ['WE', 'MO', 'SU', 'FR', 'SA', 'TH', 'TU'] `%matplotlib` prevents importing * from pylab and numpy warn("pylab import has clobbered these variables: %s" % clobbered +
import pickle from multiprocessing import Pool import random import json import sys import StockPortfolioEnv import pytz import itertools from datetime import datetime as dt from finrl.meta.preprocessor.yahoodownloader import YahooDownloader from finrl.meta.preprocessor.preprocessors import FeatureEngineer, data_split from finrl import config from finrl import config_tickers from finrl.config import ( DATA_SAVE_DIR, TRAINED_MODEL_DIR, TENSORBOARD_LOG_DIR, RESULTS_DIR, INDICATORS, TRAIN_START_DATE, TRAIN_END_DATE, TEST_START_DATE, TEST_END_DATE, TRADE_START_DATE, TRADE_END_DATE, ) if not os.path.exists("./" + config.RESULTS_DIR): os.makedirs("./" + config.RESULTS_DIR)
InteractiveShell.ast_node_interactivity = "all" pd.options.display.float_format = '{:,.3f}'.format pd.set_option('mode.use_inf_as_na', True) pd.set_option('display.max_columns', 300) pd.set_option('display.max_rows', 500) idx = pd.IndexSlice import warnings warnings.filterwarnings('ignore')

Load Data

df_merged = pd.read_csv('data/merged.csv') df_merged

Weight Initialization

Retail Weights (Rank-based method)

# Compute a set of weights for asset allocation df_merged['moribvol'] = df_merged.groupby(['date'])['moribvol'].rank(method='dense') df_merged['moribvol'] = df_merged.groupby('date')['moribvol'].apply(lambda x: x/x.sum()) df_merged

Mean-Variance Optimization Weights

#TODO: Compute and append mean variance weights to "df_merged" here, say column name as "mean-var"

Data Split

states = ['date', 'open', 'high', 'low', 'close', 'volume', 'tic', 'day', 'macd', 'boll_ub', 'boll_lb', 'rsi_30', 'cci_30', 'dx_30', 'close_30_sma', 'close_60_sma', 'moribvol'] train_data = data_split(df_merged[states], '2007-01-01', '2018-01-01') trade_data = data_split(df_merged[states], '2018-01-01', '2022-01-01') # boom bust?
train_data
trade_data
# Last, save both train and trade dfs to csv in data foler # TODO: add more features in "state" if needed train_data.to_csv('data/train_data.csv', index=True) trade_data.to_csv('data/trade_data.csv', index=True)

Benchmarks

We compare the performance of different weighting methods on the train period

  • Mean Variance

  • Equally weighted (Buy and hold)

  • Market indexes (NASDAQ and XLK)

  • Individual stocks

Environment configuration

A gym-style portfolio allocation environment for agents to interact. It is handy to compare the performances.

train = train_data trade = trade_data stock_dimension = len(train.tic.unique()) state_space = stock_dimension tech_indicator_list = ['macd', 'rsi_30', 'cci_30', 'dx_30'] feature_dimension = len(tech_indicator_list) print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}") print(f"Feature Dimension: {feature_dimension}") env_kwargs = { "hmax": 100, "initial_amount": 1000000, "transaction_cost_pct": 0, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": tech_indicator_list, "action_space": stock_dimension, "reward_scaling": 1e-1 } e_train_gym = StockPortfolioEnv.StockPortfolioEnv(df = train, **env_kwargs) e_trade_gym = StockPortfolioEnv.StockPortfolioEnv(df = trade, **env_kwargs)
Stock Dimension: 11, State Space: 11 Feature Dimension: 4

Sampling

retail_train = StockPortfolioEnv.sample_from_env(i=0, env=e_train_gym, weights=train['moribvol'])
================================= begin_total_asset:1000000 end_total_asset:4904419.075973194 Sharpe: 0.6729768484476762 =================================
# TODO: to be changed to excecute like the above function # Mean variance mean_var_df = pd.read_csv('../data/mean_var_weight.csv') mean_var_df = pd.concat([mean_var_df.iloc[[0]], mean_var_df], ignore_index=True) mean_var_df = pd.concat([mean_var_df, mean_var_df.iloc[[len(mean_var_df)-1]]], ignore_index=True) mean_var_df.loc[0, 'date'] = '2007-01-03' mean_var_df.loc[len(mean_var_df)-1, 'date'] = '2017-12-29' mean_var_df = mean_var_df.drop(mean_var_df.columns[0], axis=1) mean_var_dataset = StockPortfolioEnv.sample_from_env(i=0, env=e_train_gym, weights=mean_var_df.values) mean_var_cum_ret = pd.DataFrame({'ret': 1000000 + np.insert(mean_var_dataset['rewards'].cumsum(), 0, 0, axis=0)}) mean_var_cum_ret['ret'] /= 1000000
================================= begin_total_asset:1000000 end_total_asset:2954456.9590735934 Sharpe: 0.58838920208855 =================================
# XLK yf_xlk = YahooDownloader(start_date = start, end_date = end, ticker_list = ["XLK"]).fetch_data() yf_xlk = fe.preprocess_data(yf_xlk) yf_xlk = yf_xlk.copy() yf_xlk = yf_xlk.fillna(0) yf_xlk = yf_xlk.replace(np.inf,0) # yf_xlk['date'] = pd.to_datetime(yf_xlk['date']) yf_xlk = data_split(yf_xlk, '2007-01-01', '2018-01-01') yf_xlk['ret'] = yf_xlk['open'] / yf_xlk['open'].iloc[0]
[*********************100%***********************] 1 of 1 completed Shape of DataFrame: (4074, 8) Successfully added technical indicators Successfully added turbulence index
# individual grouped = df_merged.groupby('tic') selected_cum_ret = grouped.apply(lambda x: x['open']/x['open'].iloc[0]) selected_cum_ret = selected_cum_ret.reset_index() selected_cum_ret = selected_cum_ret.set_index('level_1') selected_cum_ret = selected_cum_ret.join(df_merged[['date']]) selected_cum_ret = data_split(selected_cum_ret, '2007-01-01', '2018-01-01')

Performance Comparison

fig, ax = plt.subplots() # Equal equal_cum_ret = pd.read_csv("./results/equal_cumulative_reward.csv", names=['ret']) equal_cum_ret['date'] = selected_cum_ret['date'].unique() ax = equal_cum_ret.plot(ax=ax, kind='line', x='date', y='ret', label="Equal") # Mean Variance mean_var_cum_ret['date'] = selected_cum_ret['date'].unique() ax = mean_var_cum_ret.plot(ax=ax, kind='line', x='date', y='ret', label="Mean Var") # Retail retail_cum_ret = pd.read_csv("results/retail_cumulative_reward.csv", names=['ret']) retail_cum_ret['date'] = selected_cum_ret['date'].unique() ax = retail_cum_ret.plot(ax=ax, kind='line', x='date', y='ret', label="Retail") # XLK ax = yf_xlk.plot(ax=ax, kind='line', x='date', y='ret', label="XLK") plt.show()
Image in a Jupyter notebook
fig, ax = plt.subplots() # 个股 for key, grp in selected_cum_ret.groupby(['tic']): ax = grp.plot(ax=ax, kind='line', x='date', y='open', label=key) plt.show()
Image in a Jupyter notebook