Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
polakowo
GitHub Repository: polakowo/vectorbt
Path: blob/master/examples/PortfolioOptimization.ipynb
1071 views
Kernel: Python 3 (ipykernel)
import os import numpy as np import pandas as pd import yfinance as yf from datetime import datetime import pytz from numba import njit from pypfopt.efficient_frontier import EfficientFrontier from pypfopt import risk_models from pypfopt import expected_returns from pypfopt import base_optimizer import vectorbt as vbt from vectorbt.generic.nb import nanmean_nb from vectorbt.portfolio.nb import order_nb, sort_call_seq_nb from vectorbt.portfolio.enums import SizeType, Direction
# Define params symbols = ['FB', 'AMZN', 'NFLX', 'GOOG', 'AAPL'] start_date = datetime(2017, 1, 1, tzinfo=pytz.utc) end_date = datetime(2020, 1, 1, tzinfo=pytz.utc) num_tests = 2000 vbt.settings.array_wrapper['freq'] = 'days' vbt.settings.returns['year_freq'] = '252 days' vbt.settings.portfolio['seed'] = 42 vbt.settings.portfolio.stats['incl_unrealized'] = True
yfdata = vbt.YFData.download(symbols, start=start_date, end=end_date) print(yfdata.symbols)
['FB', 'AMZN', 'NFLX', 'GOOG', 'AAPL']
ohlcv = yfdata.concat() print(ohlcv.keys())
dict_keys(['Open', 'High', 'Low', 'Close', 'Volume', 'Dividends', 'Stock Splits'])
price = ohlcv['Close']
# Plot normalized price series (price / price.iloc[0]).vbt.plot().show_svg()
Image in a Jupyter notebook
returns = price.pct_change()
print(returns.mean())
symbol FB 0.000918 AMZN 0.001341 NFLX 0.001509 GOOG 0.000812 AAPL 0.001414 dtype: float64
print(returns.std())
symbol FB 0.018262 AMZN 0.017309 NFLX 0.023354 GOOG 0.014594 AAPL 0.015544 dtype: float64
print(returns.corr())
symbol FB AMZN NFLX GOOG AAPL symbol FB 1.000000 0.595549 0.464163 0.612674 0.467665 AMZN 0.595549 1.000000 0.614445 0.687693 0.601882 NFLX 0.464163 0.614445 1.000000 0.554071 0.453648 GOOG 0.612674 0.687693 0.554071 1.000000 0.605521 AAPL 0.467665 0.601882 0.453648 0.605521 1.000000

One-time allocation

np.random.seed(42) # Generate random weights, n times weights = [] for i in range(num_tests): w = np.random.random_sample(len(symbols)) w = w / np.sum(w) weights.append(w) print(len(weights))
2000
# Build column hierarchy such that one weight corresponds to one price series _price = price.vbt.tile(num_tests, keys=pd.Index(np.arange(num_tests), name='symbol_group')) _price = _price.vbt.stack_index(pd.Index(np.concatenate(weights), name='weights')) print(_price.columns)
MultiIndex([( 0.13319702814025883, 0, 'FB'), ( 0.33810081711389406, 0, 'AMZN'), ( 0.26031768763785473, 0, 'NFLX'), ( 0.2128998389048247, 0, 'GOOG'), ( 0.05548462820316767, 0, 'AAPL'), ( 0.06528491964469331, 1, 'FB'), ( 0.02430844330237927, 1, 'AMZN'), ( 0.3625014516740258, 1, 'NFLX'), ( 0.2515713061862386, 1, 'GOOG'), ( 0.29633387919266296, 1, 'AAPL'), ... ( 0.2056564359049325, 1998, 'FB'), ( 0.14846396871443943, 1998, 'AMZN'), ( 0.21512097636364197, 1998, 'NFLX'), ( 0.3738566007394396, 1998, 'GOOG'), (0.056902018277546554, 1998, 'AAPL'), ( 0.25860265182212094, 1999, 'FB'), ( 0.2706191852849979, 1999, 'AMZN'), ( 0.2854538191129893, 1999, 'NFLX'), ( 0.11985160754099378, 1999, 'GOOG'), ( 0.0654727362388982, 1999, 'AAPL')], names=['weights', 'symbol_group', 'symbol'], length=10000)
# Define order size size = np.full_like(_price, np.nan) size[0, :] = np.concatenate(weights) # allocate at first timestamp, do nothing afterwards print(size.shape)
(754, 10000)
# Run simulation pf = vbt.Portfolio.from_orders( close=_price, size=size, size_type='targetpercent', group_by='symbol_group', cash_sharing=True ) # all weights sum to 1, no shorting, and 100% investment in risky assets print(len(pf.orders))
10000
# Plot annualized return against volatility, color by sharpe ratio annualized_return = pf.annualized_return() annualized_return.index = pf.annualized_volatility() annualized_return.vbt.scatterplot( trace_kwargs=dict( mode='markers', marker=dict( color=pf.sharpe_ratio(), colorbar=dict( title='sharpe_ratio' ), size=5, opacity=0.7 ) ), xaxis_title='annualized_volatility', yaxis_title='annualized_return' ).show_svg()
Image in a Jupyter notebook
# Get index of the best group according to the target metric best_symbol_group = pf.sharpe_ratio().idxmax() print(best_symbol_group)
214
# Print best weights print(weights[best_symbol_group])
[0.18782144 0.14807743 0.0266817 0.01132979 0.62608964]
# Compute default stats print(pf.iloc[best_symbol_group].stats())
Start 2017-01-03 00:00:00+00:00 End 2019-12-31 00:00:00+00:00 Period 754 days 00:00:00 Start Value 100.0 End Value 243.693256 Total Return [%] 143.693256 Benchmark Return [%] 121.870057 Max Gross Exposure [%] 100.0 Total Fees Paid 0.0 Max Drawdown [%] 33.993899 Max Drawdown Duration 277 days 00:00:00 Total Trades 5 Total Closed Trades 0 Total Open Trades 5 Open Trade PnL 143.693256 Win Rate [%] NaN Best Trade [%] NaN Worst Trade [%] NaN Avg Winning Trade [%] NaN Avg Losing Trade [%] NaN Avg Winning Trade Duration NaT Avg Losing Trade Duration NaT Profit Factor NaN Expectancy NaN Sharpe Ratio 1.441149 Calmar Ratio 1.020062 Omega Ratio 1.298739 Sortino Ratio 2.071869 Name: 214, dtype: object

Rebalance monthly

# Select the first index of each month rb_mask = ~_price.index.to_period('m').duplicated() print(rb_mask.sum())
36
/Users/olegpolakow/miniconda3/lib/python3.7/site-packages/pandas/core/arrays/datetimes.py:1146: UserWarning: Converting to PeriodArray/Index representation will drop timezone information.
rb_size = np.full_like(_price, np.nan) rb_size[rb_mask, :] = np.concatenate(weights) # allocate at mask print(rb_size.shape)
(754, 10000)
# Run simulation, with rebalancing monthly rb_pf = vbt.Portfolio.from_orders( close=_price, size=rb_size, size_type='targetpercent', group_by='symbol_group', cash_sharing=True, call_seq='auto' # important: sell before buy ) print(len(rb_pf.orders))
359995
rb_best_symbol_group = rb_pf.sharpe_ratio().idxmax() print(rb_best_symbol_group)
214
print(weights[rb_best_symbol_group])
[0.18782144 0.14807743 0.0266817 0.01132979 0.62608964]
print(rb_pf.iloc[rb_best_symbol_group].stats())
Start 2017-01-03 00:00:00+00:00 End 2019-12-31 00:00:00+00:00 Period 754 days 00:00:00 Start Value 100.0 End Value 248.651338 Total Return [%] 148.651338 Benchmark Return [%] 121.870057 Max Gross Exposure [%] 100.0 Total Fees Paid 0.0 Max Drawdown [%] 33.258623 Max Drawdown Duration 256 days 00:00:00 Total Trades 92 Total Closed Trades 87 Total Open Trades 5 Open Trade PnL 117.477516 Win Rate [%] 97.701149 Best Trade [%] 189.769858 Worst Trade [%] -4.766427 Avg Winning Trade [%] 54.827116 Avg Losing Trade [%] -2.86779 Avg Winning Trade Duration 360 days 17:13:24.705882352 Avg Losing Trade Duration 492 days 12:00:00 Profit Factor 218.080541 Expectancy 0.35832 Sharpe Ratio 1.47594 Calmar Ratio 1.069964 Omega Ratio 1.306485 Sortino Ratio 2.12855 Name: 214, dtype: object
def plot_allocation(rb_pf): # Plot weights development of the portfolio rb_asset_value = rb_pf.asset_value(group_by=False) rb_value = rb_pf.value() rb_idxs = np.flatnonzero((rb_pf.asset_flow() != 0).any(axis=1)) rb_dates = rb_pf.wrapper.index[rb_idxs] fig = (rb_asset_value.vbt / rb_value).vbt.plot( trace_names=symbols, trace_kwargs=dict( stackgroup='one' ) ) for rb_date in rb_dates: fig.add_shape( dict( xref='x', yref='paper', x0=rb_date, x1=rb_date, y0=0, y1=1, line_color=fig.layout.template.layout.plot_bgcolor ) ) fig.show_svg()
plot_allocation(rb_pf.iloc[rb_best_symbol_group]) # best group
Image in a Jupyter notebook

Search and rebalance every 30 days

Utilize low-level API to dynamically search for best Sharpe ratio and rebalance accordingly. Compared to previous method, we won't utilize stacking, but do search in a loop instead. We also will use days instead of months, as latter may contain a various number of trading days.

srb_sharpe = np.full(price.shape[0], np.nan) @njit def pre_sim_func_nb(c, every_nth): # Define rebalancing days c.segment_mask[:, :] = False c.segment_mask[every_nth::every_nth, :] = True return () @njit def find_weights_nb(c, price, num_tests): # Find optimal weights based on best Sharpe ratio returns = (price[1:] - price[:-1]) / price[:-1] returns = returns[1:, :] # cannot compute np.cov with NaN mean = nanmean_nb(returns) cov = np.cov(returns, rowvar=False) # masked arrays not supported by Numba (yet) best_sharpe_ratio = -np.inf weights = np.full(c.group_len, np.nan, dtype=np.float64) for i in range(num_tests): # Generate weights w = np.random.random_sample(c.group_len) w = w / np.sum(w) # Compute annualized mean, covariance, and Sharpe ratio p_return = np.sum(mean * w) * ann_factor p_std = np.sqrt(np.dot(w.T, np.dot(cov, w))) * np.sqrt(ann_factor) sharpe_ratio = p_return / p_std if sharpe_ratio > best_sharpe_ratio: best_sharpe_ratio = sharpe_ratio weights = w return best_sharpe_ratio, weights @njit def pre_segment_func_nb(c, find_weights_nb, history_len, ann_factor, num_tests, srb_sharpe): if history_len == -1: # Look back at the entire time period close = c.close[:c.i, c.from_col:c.to_col] else: # Look back at a fixed time period if c.i - history_len <= 0: return (np.full(c.group_len, np.nan),) # insufficient data close = c.close[c.i - history_len:c.i, c.from_col:c.to_col] # Find optimal weights best_sharpe_ratio, weights = find_weights_nb(c, close, num_tests) srb_sharpe[c.i] = best_sharpe_ratio # Update valuation price and reorder orders size_type = SizeType.TargetPercent direction = Direction.LongOnly order_value_out = np.empty(c.group_len, dtype=np.float64) for k in range(c.group_len): col = c.from_col + k c.last_val_price[col] = c.close[c.i, col] sort_call_seq_nb(c, weights, size_type, direction, order_value_out) return (weights,) @njit def order_func_nb(c, weights): col_i = c.call_seq_now[c.call_idx] return order_nb( weights[col_i], c.close[c.i, c.col], size_type=SizeType.TargetPercent )
ann_factor = returns.vbt.returns.ann_factor
# Run simulation using a custom order function srb_pf = vbt.Portfolio.from_order_func( price, order_func_nb, pre_sim_func_nb=pre_sim_func_nb, pre_sim_args=(30,), pre_segment_func_nb=pre_segment_func_nb, pre_segment_args=(find_weights_nb, -1, ann_factor, num_tests, srb_sharpe), cash_sharing=True, group_by=True )
# Plot best Sharpe ratio at each rebalancing day pd.Series(srb_sharpe, index=price.index).vbt.scatterplot(trace_kwargs=dict(mode='markers')).show_svg()
Image in a Jupyter notebook
print(srb_pf.stats())
Start 2017-01-03 00:00:00+00:00 End 2019-12-31 00:00:00+00:00 Period 754 days 00:00:00 Start Value 100.0 End Value 182.103898 Total Return [%] 82.103898 Benchmark Return [%] 121.870057 Max Gross Exposure [%] 100.0 Total Fees Paid 0.0 Max Drawdown [%] 34.350877 Max Drawdown Duration 310 days 00:00:00 Total Trades 67 Total Closed Trades 62 Total Open Trades 5 Open Trade PnL 33.965423 Win Rate [%] 77.419355 Best Trade [%] 43.78028 Worst Trade [%] -15.059913 Avg Winning Trade [%] 13.720936 Avg Losing Trade [%] -4.410183 Avg Winning Trade Duration 334 days 09:00:00 Avg Losing Trade Duration 439 days 06:51:25.714285712 Profit Factor 6.379302 Expectancy 0.776427 Sharpe Ratio 0.959325 Calmar Ratio 0.645715 Omega Ratio 1.193184 Sortino Ratio 1.366078 Name: group, dtype: object
plot_allocation(srb_pf)
Image in a Jupyter notebook

You can see how weights stabilize themselves with growing data.

# Run simulation, but now consider only the last 252 days of data srb252_sharpe = np.full(price.shape[0], np.nan) srb252_pf = vbt.Portfolio.from_order_func( price, order_func_nb, pre_sim_func_nb=pre_sim_func_nb, pre_sim_args=(30,), pre_segment_func_nb=pre_segment_func_nb, pre_segment_args=(find_weights_nb, 252, ann_factor, num_tests, srb252_sharpe), cash_sharing=True, group_by=True )
pd.Series(srb252_sharpe, index=price.index).vbt.scatterplot(trace_kwargs=dict(mode='markers')).show_svg()
Image in a Jupyter notebook
print(srb252_pf.stats())
Start 2017-01-03 00:00:00+00:00 End 2019-12-31 00:00:00+00:00 Period 754 days 00:00:00 Start Value 100.0 End Value 138.658042 Total Return [%] 38.658042 Benchmark Return [%] 121.870057 Max Gross Exposure [%] 100.0 Total Fees Paid 0.0 Max Drawdown [%] 33.135977 Max Drawdown Duration 192 days 00:00:00 Total Trades 39 Total Closed Trades 34 Total Open Trades 5 Open Trade PnL 10.464979 Win Rate [%] 58.823529 Best Trade [%] 16.361723 Worst Trade [%] -15.501637 Avg Winning Trade [%] 9.395734 Avg Losing Trade [%] -4.970106 Avg Winning Trade Duration 300 days 00:00:00 Avg Losing Trade Duration 244 days 06:51:25.714285712 Profit Factor 5.487362 Expectancy 0.829208 Sharpe Ratio 0.581345 Calmar Ratio 0.348339 Omega Ratio 1.13617 Sortino Ratio 0.834951 Name: group, dtype: object
plot_allocation(srb252_pf)
Image in a Jupyter notebook

A much more volatile weight distribution.

PyPortfolioOpt + vectorbt

One-time allocation

# Calculate expected returns and sample covariance amtrix avg_returns = expected_returns.mean_historical_return(price) cov_mat = risk_models.sample_cov(price) # Get weights maximizing the Sharpe ratio ef = EfficientFrontier(avg_returns, cov_mat) weights = ef.max_sharpe() clean_weights = ef.clean_weights() pyopt_weights = np.array([clean_weights[symbol] for symbol in symbols]) print(pyopt_weights)
[0. 0.22232 0.06347 0. 0.7142 ]
pyopt_size = np.full_like(price, np.nan) pyopt_size[0, :] = pyopt_weights # allocate at first timestamp, do nothing afterwards print(pyopt_size.shape)
(754, 5)
# Run simulation with weights from PyPortfolioOpt pyopt_pf = vbt.Portfolio.from_orders( close=price, size=pyopt_size, size_type='targetpercent', group_by=True, cash_sharing=True ) print(len(pyopt_pf.orders))
3

Faster than stacking solution, but doesn't let you compare weights.

print(pyopt_pf.stats())
Start 2017-01-03 00:00:00+00:00 End 2019-12-31 00:00:00+00:00 Period 754 days 00:00:00 Start Value 100.0 End Value 259.637702 Total Return [%] 159.637702 Benchmark Return [%] 121.870057 Max Gross Exposure [%] 99.999615 Total Fees Paid 0.0 Max Drawdown [%] 35.25499 Max Drawdown Duration 266 days 00:00:00 Total Trades 3 Total Closed Trades 0 Total Open Trades 3 Open Trade PnL 159.637702 Win Rate [%] NaN Best Trade [%] NaN Worst Trade [%] NaN Avg Winning Trade [%] NaN Avg Losing Trade [%] NaN Avg Winning Trade Duration NaT Avg Losing Trade Duration NaT Profit Factor NaN Expectancy NaN Sharpe Ratio 1.495774 Calmar Ratio 1.065352 Omega Ratio 1.311767 Sortino Ratio 2.186792 Name: group, dtype: object

Search and rebalance monthly

You can't use third-party optimization packages within Numba (yet).

Here you have two choices:

  1. Use os.environ['NUMBA_DISABLE_JIT'] = '1' before all imports to disable Numba completely

  2. Disable Numba for the function, but also for every other function in the stack that calls it

We will demonstrate the second option.

def pyopt_find_weights(sc, price, num_tests): # no @njit decorator = it's a pure Python function # Calculate expected returns and sample covariance matrix price = pd.DataFrame(price, columns=symbols) avg_returns = expected_returns.mean_historical_return(price) cov_mat = risk_models.sample_cov(price) # Get weights maximizing the Sharpe ratio ef = EfficientFrontier(avg_returns, cov_mat) weights = ef.max_sharpe() clean_weights = ef.clean_weights() weights = np.array([clean_weights[symbol] for symbol in symbols]) best_sharpe_ratio = base_optimizer.portfolio_performance(weights, avg_returns, cov_mat)[2] return best_sharpe_ratio, weights
pyopt_srb_sharpe = np.full(price.shape[0], np.nan) # Run simulation with a custom order function pyopt_srb_pf = vbt.Portfolio.from_order_func( price, order_func_nb, pre_sim_func_nb=pre_sim_func_nb, pre_sim_args=(30,), pre_segment_func_nb=pre_segment_func_nb.py_func, # run pre_segment_func_nb as pure Python function pre_segment_args=(pyopt_find_weights, -1, ann_factor, num_tests, pyopt_srb_sharpe), cash_sharing=True, group_by=True, use_numba=False # run simulate_nb as pure Python function )
pd.Series(pyopt_srb_sharpe, index=price.index).vbt.scatterplot(trace_kwargs=dict(mode='markers')).show_svg()
Image in a Jupyter notebook
print(pyopt_srb_pf.stats())
Start 2017-01-03 00:00:00+00:00 End 2019-12-31 00:00:00+00:00 Period 754 days 00:00:00 Start Value 100.0 End Value 166.711167 Total Return [%] 66.711167 Benchmark Return [%] 121.870057 Max Gross Exposure [%] 100.0 Total Fees Paid 0.0 Max Drawdown [%] 35.363921 Max Drawdown Duration 333 days 00:00:00 Total Trades 44 Total Closed Trades 41 Total Open Trades 3 Open Trade PnL 27.442055 Win Rate [%] 78.04878 Best Trade [%] 35.953901 Worst Trade [%] -30.635401 Avg Winning Trade [%] 12.751618 Avg Losing Trade [%] -9.885814 Avg Winning Trade Duration 195 days 22:30:00 Avg Losing Trade Duration 173 days 08:00:00 Profit Factor 3.224386 Expectancy 0.957783 Sharpe Ratio 0.808511 Calmar Ratio 0.526731 Omega Ratio 1.160979 Sortino Ratio 1.148531 Name: group, dtype: object
plot_allocation(pyopt_srb_pf)
Image in a Jupyter notebook