CoCalc -- PortfolioOptimization.ipynb

GitHub Repository: polakowo/vectorbt
Path: blob/master/examples/PortfolioOptimization.ipynb
¹⁰⁷¹ views

Kernel: Python 3 (ipykernel)

In [1]:

import os
import numpy as np
import pandas as pd
import yfinance as yf
from datetime import datetime
import pytz
from numba import njit

from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt import risk_models
from pypfopt import expected_returns
from pypfopt import base_optimizer

import vectorbt as vbt
from vectorbt.generic.nb import nanmean_nb
from vectorbt.portfolio.nb import order_nb, sort_call_seq_nb
from vectorbt.portfolio.enums import SizeType, Direction

In [2]:

# Define params
symbols = ['FB', 'AMZN', 'NFLX', 'GOOG', 'AAPL']
start_date = datetime(2017, 1, 1, tzinfo=pytz.utc)
end_date = datetime(2020, 1, 1, tzinfo=pytz.utc)
num_tests = 2000

vbt.settings.array_wrapper['freq'] = 'days'
vbt.settings.returns['year_freq'] = '252 days'
vbt.settings.portfolio['seed'] = 42
vbt.settings.portfolio.stats['incl_unrealized'] = True

In [3]:

yfdata = vbt.YFData.download(symbols, start=start_date, end=end_date)

print(yfdata.symbols)

Out[3]:

['FB', 'AMZN', 'NFLX', 'GOOG', 'AAPL']

In [4]:

ohlcv = yfdata.concat()

print(ohlcv.keys())

Out[4]:

dict_keys(['Open', 'High', 'Low', 'Close', 'Volume', 'Dividends', 'Stock Splits'])

In [5]:

price = ohlcv['Close']

In [6]:

# Plot normalized price series
(price / price.iloc[0]).vbt.plot().show_svg()

Out[6]:

In [7]:

returns = price.pct_change()

In [8]:

print(returns.mean())

Out[8]:

symbol
FB      0.000918
AMZN    0.001341
NFLX    0.001509
GOOG    0.000812
AAPL    0.001414
dtype: float64

In [9]:

print(returns.std())

Out[9]:

symbol
FB      0.018262
AMZN    0.017309
NFLX    0.023354
GOOG    0.014594
AAPL    0.015544
dtype: float64

In [10]:

print(returns.corr())

Out[10]:

symbol        FB      AMZN      NFLX      GOOG      AAPL
symbol                                                  
FB      1.000000  0.595549  0.464163  0.612674  0.467665
AMZN    0.595549  1.000000  0.614445  0.687693  0.601882
NFLX    0.464163  0.614445  1.000000  0.554071  0.453648
GOOG    0.612674  0.687693  0.554071  1.000000  0.605521
AAPL    0.467665  0.601882  0.453648  0.605521  1.000000

vectorbt: Random search

One-time allocation

In [11]:

np.random.seed(42)

# Generate random weights, n times
weights = []
for i in range(num_tests):
    w = np.random.random_sample(len(symbols))
    w = w / np.sum(w)
    weights.append(w)

print(len(weights))

Out[11]:

2000

In [12]:

# Build column hierarchy such that one weight corresponds to one price series
_price = price.vbt.tile(num_tests, keys=pd.Index(np.arange(num_tests), name='symbol_group'))
_price = _price.vbt.stack_index(pd.Index(np.concatenate(weights), name='weights'))

print(_price.columns)

Out[12]:

MultiIndex([( 0.13319702814025883,    0,   'FB'),
            ( 0.33810081711389406,    0, 'AMZN'),
            ( 0.26031768763785473,    0, 'NFLX'),
            (  0.2128998389048247,    0, 'GOOG'),
            ( 0.05548462820316767,    0, 'AAPL'),
            ( 0.06528491964469331,    1,   'FB'),
            ( 0.02430844330237927,    1, 'AMZN'),
            (  0.3625014516740258,    1, 'NFLX'),
            (  0.2515713061862386,    1, 'GOOG'),
            ( 0.29633387919266296,    1, 'AAPL'),
            ...
            (  0.2056564359049325, 1998,   'FB'),
            ( 0.14846396871443943, 1998, 'AMZN'),
            ( 0.21512097636364197, 1998, 'NFLX'),
            (  0.3738566007394396, 1998, 'GOOG'),
            (0.056902018277546554, 1998, 'AAPL'),
            ( 0.25860265182212094, 1999,   'FB'),
            (  0.2706191852849979, 1999, 'AMZN'),
            (  0.2854538191129893, 1999, 'NFLX'),
            ( 0.11985160754099378, 1999, 'GOOG'),
            (  0.0654727362388982, 1999, 'AAPL')],
           names=['weights', 'symbol_group', 'symbol'], length=10000)

In [13]:

# Define order size
size = np.full_like(_price, np.nan)
size[0, :] = np.concatenate(weights)  # allocate at first timestamp, do nothing afterwards

print(size.shape)

Out[13]:

(754, 10000)

In [14]:

# Run simulation
pf = vbt.Portfolio.from_orders(
    close=_price,
    size=size,
    size_type='targetpercent',
    group_by='symbol_group',
    cash_sharing=True
) # all weights sum to 1, no shorting, and 100% investment in risky assets

print(len(pf.orders))

Out[14]:

10000

In [15]:

# Plot annualized return against volatility, color by sharpe ratio
annualized_return = pf.annualized_return()
annualized_return.index = pf.annualized_volatility()
annualized_return.vbt.scatterplot(
    trace_kwargs=dict(
        mode='markers', 
        marker=dict(
            color=pf.sharpe_ratio(),
            colorbar=dict(
                title='sharpe_ratio'
            ),
            size=5,
            opacity=0.7
        )
    ),
    xaxis_title='annualized_volatility',
    yaxis_title='annualized_return'
).show_svg()

Out[15]:

In [16]:

# Get index of the best group according to the target metric
best_symbol_group = pf.sharpe_ratio().idxmax()

print(best_symbol_group)

Out[16]:

214

In [17]:

# Print best weights
print(weights[best_symbol_group])

Out[17]:

[0.18782144 0.14807743 0.0266817  0.01132979 0.62608964]

In [18]:

# Compute default stats
print(pf.iloc[best_symbol_group].stats())

Out[18]:

Start                         2017-01-03 00:00:00+00:00
End                           2019-12-31 00:00:00+00:00
Period                                754 days 00:00:00
Start Value                                       100.0
End Value                                    243.693256
Total Return [%]                             143.693256
Benchmark Return [%]                         121.870057
Max Gross Exposure [%]                            100.0
Total Fees Paid                                     0.0
Max Drawdown [%]                              33.993899
Max Drawdown Duration                 277 days 00:00:00
Total Trades                                          5
Total Closed Trades                                   0
Total Open Trades                                     5
Open Trade PnL                               143.693256
Win Rate [%]                                        NaN
Best Trade [%]                                      NaN
Worst Trade [%]                                     NaN
Avg Winning Trade [%]                               NaN
Avg Losing Trade [%]                                NaN
Avg Winning Trade Duration                          NaT
Avg Losing Trade Duration                           NaT
Profit Factor                                       NaN
Expectancy                                          NaN
Sharpe Ratio                                   1.441149
Calmar Ratio                                   1.020062
Omega Ratio                                    1.298739
Sortino Ratio                                  2.071869
Name: 214, dtype: object

Rebalance monthly

In [19]:

# Select the first index of each month
rb_mask = ~_price.index.to_period('m').duplicated()

print(rb_mask.sum())

Out[19]:

36

/Users/olegpolakow/miniconda3/lib/python3.7/site-packages/pandas/core/arrays/datetimes.py:1146: UserWarning:

Converting to PeriodArray/Index representation will drop timezone information.

In [20]:

rb_size = np.full_like(_price, np.nan)
rb_size[rb_mask, :] = np.concatenate(weights)  # allocate at mask

print(rb_size.shape)

Out[20]:

(754, 10000)

In [21]:

# Run simulation, with rebalancing monthly
rb_pf = vbt.Portfolio.from_orders(
    close=_price,
    size=rb_size,
    size_type='targetpercent',
    group_by='symbol_group',
    cash_sharing=True,
    call_seq='auto'  # important: sell before buy
)

print(len(rb_pf.orders))

Out[21]:

359995

In [22]:

rb_best_symbol_group = rb_pf.sharpe_ratio().idxmax()

print(rb_best_symbol_group)

Out[22]:

214

In [23]:

print(weights[rb_best_symbol_group])

Out[23]:

[0.18782144 0.14807743 0.0266817  0.01132979 0.62608964]

In [24]:

print(rb_pf.iloc[rb_best_symbol_group].stats())

Out[24]:

Start                           2017-01-03 00:00:00+00:00
End                             2019-12-31 00:00:00+00:00
Period                                  754 days 00:00:00
Start Value                                         100.0
End Value                                      248.651338
Total Return [%]                               148.651338
Benchmark Return [%]                           121.870057
Max Gross Exposure [%]                              100.0
Total Fees Paid                                       0.0
Max Drawdown [%]                                33.258623
Max Drawdown Duration                   256 days 00:00:00
Total Trades                                           92
Total Closed Trades                                    87
Total Open Trades                                       5
Open Trade PnL                                 117.477516
Win Rate [%]                                    97.701149
Best Trade [%]                                 189.769858
Worst Trade [%]                                 -4.766427
Avg Winning Trade [%]                           54.827116
Avg Losing Trade [%]                             -2.86779
Avg Winning Trade Duration    360 days 17:13:24.705882352
Avg Losing Trade Duration               492 days 12:00:00
Profit Factor                                  218.080541
Expectancy                                        0.35832
Sharpe Ratio                                      1.47594
Calmar Ratio                                     1.069964
Omega Ratio                                      1.306485
Sortino Ratio                                     2.12855
Name: 214, dtype: object

In [25]:

def plot_allocation(rb_pf):
    # Plot weights development of the portfolio
    rb_asset_value = rb_pf.asset_value(group_by=False)
    rb_value = rb_pf.value()
    rb_idxs = np.flatnonzero((rb_pf.asset_flow() != 0).any(axis=1))
    rb_dates = rb_pf.wrapper.index[rb_idxs]
    fig = (rb_asset_value.vbt / rb_value).vbt.plot(
        trace_names=symbols,
        trace_kwargs=dict(
            stackgroup='one'
        )
    )
    for rb_date in rb_dates:
        fig.add_shape(
            dict(
                xref='x',
                yref='paper',
                x0=rb_date,
                x1=rb_date,
                y0=0,
                y1=1,
                line_color=fig.layout.template.layout.plot_bgcolor
            )
        )
    fig.show_svg()

In [26]:

plot_allocation(rb_pf.iloc[rb_best_symbol_group])  # best group

Out[26]:

Search and rebalance every 30 days

Utilize low-level API to dynamically search for best Sharpe ratio and rebalance accordingly. Compared to previous method, we won't utilize stacking, but do search in a loop instead. We also will use days instead of months, as latter may contain a various number of trading days.

In [27]:

srb_sharpe = np.full(price.shape[0], np.nan)

@njit
def pre_sim_func_nb(c, every_nth):
    # Define rebalancing days
    c.segment_mask[:, :] = False
    c.segment_mask[every_nth::every_nth, :] = True
    return ()

@njit
def find_weights_nb(c, price, num_tests):
    # Find optimal weights based on best Sharpe ratio
    returns = (price[1:] - price[:-1]) / price[:-1]
    returns = returns[1:, :]  # cannot compute np.cov with NaN
    mean = nanmean_nb(returns)
    cov = np.cov(returns, rowvar=False)  # masked arrays not supported by Numba (yet)
    best_sharpe_ratio = -np.inf
    weights = np.full(c.group_len, np.nan, dtype=np.float64)
    
    for i in range(num_tests):
        # Generate weights
        w = np.random.random_sample(c.group_len)
        w = w / np.sum(w)
        
        # Compute annualized mean, covariance, and Sharpe ratio
        p_return = np.sum(mean * w) * ann_factor
        p_std = np.sqrt(np.dot(w.T, np.dot(cov, w))) * np.sqrt(ann_factor)
        sharpe_ratio = p_return / p_std
        if sharpe_ratio > best_sharpe_ratio:
            best_sharpe_ratio = sharpe_ratio
            weights = w
            
    return best_sharpe_ratio, weights

@njit
def pre_segment_func_nb(c, find_weights_nb, history_len, ann_factor, num_tests, srb_sharpe):
    if history_len == -1:
        # Look back at the entire time period
        close = c.close[:c.i, c.from_col:c.to_col]
    else:
        # Look back at a fixed time period
        if c.i - history_len <= 0:
            return (np.full(c.group_len, np.nan),)  # insufficient data
        close = c.close[c.i - history_len:c.i, c.from_col:c.to_col]

    # Find optimal weights
    best_sharpe_ratio, weights = find_weights_nb(c, close, num_tests)
    srb_sharpe[c.i] = best_sharpe_ratio

    # Update valuation price and reorder orders
    size_type = SizeType.TargetPercent
    direction = Direction.LongOnly
    order_value_out = np.empty(c.group_len, dtype=np.float64)
    for k in range(c.group_len):
        col = c.from_col + k
        c.last_val_price[col] = c.close[c.i, col]
    sort_call_seq_nb(c, weights, size_type, direction, order_value_out)

    return (weights,)

@njit
def order_func_nb(c, weights):
    col_i = c.call_seq_now[c.call_idx]
    return order_nb(
        weights[col_i], 
        c.close[c.i, c.col],
        size_type=SizeType.TargetPercent
    )

In [28]:

ann_factor = returns.vbt.returns.ann_factor

In [29]:

# Run simulation using a custom order function
srb_pf = vbt.Portfolio.from_order_func(
    price,
    order_func_nb,
    pre_sim_func_nb=pre_sim_func_nb,
    pre_sim_args=(30,),
    pre_segment_func_nb=pre_segment_func_nb,
    pre_segment_args=(find_weights_nb, -1, ann_factor, num_tests, srb_sharpe),
    cash_sharing=True, 
    group_by=True
)

In [30]:

# Plot best Sharpe ratio at each rebalancing day
pd.Series(srb_sharpe, index=price.index).vbt.scatterplot(trace_kwargs=dict(mode='markers')).show_svg()

Out[30]:

In [31]:

print(srb_pf.stats())

Out[31]:

Start                           2017-01-03 00:00:00+00:00
End                             2019-12-31 00:00:00+00:00
Period                                  754 days 00:00:00
Start Value                                         100.0
End Value                                      182.103898
Total Return [%]                                82.103898
Benchmark Return [%]                           121.870057
Max Gross Exposure [%]                              100.0
Total Fees Paid                                       0.0
Max Drawdown [%]                                34.350877
Max Drawdown Duration                   310 days 00:00:00
Total Trades                                           67
Total Closed Trades                                    62
Total Open Trades                                       5
Open Trade PnL                                  33.965423
Win Rate [%]                                    77.419355
Best Trade [%]                                   43.78028
Worst Trade [%]                                -15.059913
Avg Winning Trade [%]                           13.720936
Avg Losing Trade [%]                            -4.410183
Avg Winning Trade Duration              334 days 09:00:00
Avg Losing Trade Duration     439 days 06:51:25.714285712
Profit Factor                                    6.379302
Expectancy                                       0.776427
Sharpe Ratio                                     0.959325
Calmar Ratio                                     0.645715
Omega Ratio                                      1.193184
Sortino Ratio                                    1.366078
Name: group, dtype: object

In [32]:

plot_allocation(srb_pf)

Out[32]:

You can see how weights stabilize themselves with growing data.

In [33]:

# Run simulation, but now consider only the last 252 days of data
srb252_sharpe = np.full(price.shape[0], np.nan)

srb252_pf = vbt.Portfolio.from_order_func(
    price,
    order_func_nb,
    pre_sim_func_nb=pre_sim_func_nb,
    pre_sim_args=(30,),
    pre_segment_func_nb=pre_segment_func_nb,
    pre_segment_args=(find_weights_nb, 252, ann_factor, num_tests, srb252_sharpe),
    cash_sharing=True, 
    group_by=True
)

In [34]:

pd.Series(srb252_sharpe, index=price.index).vbt.scatterplot(trace_kwargs=dict(mode='markers')).show_svg()

Out[34]:

In [35]:

print(srb252_pf.stats())

Out[35]:

Start                           2017-01-03 00:00:00+00:00
End                             2019-12-31 00:00:00+00:00
Period                                  754 days 00:00:00
Start Value                                         100.0
End Value                                      138.658042
Total Return [%]                                38.658042
Benchmark Return [%]                           121.870057
Max Gross Exposure [%]                              100.0
Total Fees Paid                                       0.0
Max Drawdown [%]                                33.135977
Max Drawdown Duration                   192 days 00:00:00
Total Trades                                           39
Total Closed Trades                                    34
Total Open Trades                                       5
Open Trade PnL                                  10.464979
Win Rate [%]                                    58.823529
Best Trade [%]                                  16.361723
Worst Trade [%]                                -15.501637
Avg Winning Trade [%]                            9.395734
Avg Losing Trade [%]                            -4.970106
Avg Winning Trade Duration              300 days 00:00:00
Avg Losing Trade Duration     244 days 06:51:25.714285712
Profit Factor                                    5.487362
Expectancy                                       0.829208
Sharpe Ratio                                     0.581345
Calmar Ratio                                     0.348339
Omega Ratio                                       1.13617
Sortino Ratio                                    0.834951
Name: group, dtype: object

In [36]:

plot_allocation(srb252_pf)

Out[36]:

A much more volatile weight distribution.

PyPortfolioOpt + vectorbt

One-time allocation

In [37]:

# Calculate expected returns and sample covariance amtrix
avg_returns = expected_returns.mean_historical_return(price)
cov_mat = risk_models.sample_cov(price)

# Get weights maximizing the Sharpe ratio
ef = EfficientFrontier(avg_returns, cov_mat)
weights = ef.max_sharpe()
clean_weights = ef.clean_weights()
pyopt_weights = np.array([clean_weights[symbol] for symbol in symbols])

print(pyopt_weights)

Out[37]:

[0.      0.22232 0.06347 0.      0.7142 ]

In [38]:

pyopt_size = np.full_like(price, np.nan)
pyopt_size[0, :] = pyopt_weights  # allocate at first timestamp, do nothing afterwards

print(pyopt_size.shape)

Out[38]:

(754, 5)

In [39]:

# Run simulation with weights from PyPortfolioOpt
pyopt_pf = vbt.Portfolio.from_orders(
    close=price,
    size=pyopt_size,
    size_type='targetpercent',
    group_by=True,
    cash_sharing=True
)

print(len(pyopt_pf.orders))

Out[39]:

3

Faster than stacking solution, but doesn't let you compare weights.

In [40]:

print(pyopt_pf.stats())

Out[40]:

Start                         2017-01-03 00:00:00+00:00
End                           2019-12-31 00:00:00+00:00
Period                                754 days 00:00:00
Start Value                                       100.0
End Value                                    259.637702
Total Return [%]                             159.637702
Benchmark Return [%]                         121.870057
Max Gross Exposure [%]                        99.999615
Total Fees Paid                                     0.0
Max Drawdown [%]                               35.25499
Max Drawdown Duration                 266 days 00:00:00
Total Trades                                          3
Total Closed Trades                                   0
Total Open Trades                                     3
Open Trade PnL                               159.637702
Win Rate [%]                                        NaN
Best Trade [%]                                      NaN
Worst Trade [%]                                     NaN
Avg Winning Trade [%]                               NaN
Avg Losing Trade [%]                                NaN
Avg Winning Trade Duration                          NaT
Avg Losing Trade Duration                           NaT
Profit Factor                                       NaN
Expectancy                                          NaN
Sharpe Ratio                                   1.495774
Calmar Ratio                                   1.065352
Omega Ratio                                    1.311767
Sortino Ratio                                  2.186792
Name: group, dtype: object

Search and rebalance monthly

You can't use third-party optimization packages within Numba (yet).

Here you have two choices:

Use os.environ['NUMBA_DISABLE_JIT'] = '1' before all imports to disable Numba completely
Disable Numba for the function, but also for every other function in the stack that calls it

We will demonstrate the second option.

In [41]:

def pyopt_find_weights(sc, price, num_tests):  # no @njit decorator = it's a pure Python function
    # Calculate expected returns and sample covariance matrix
    price = pd.DataFrame(price, columns=symbols)
    avg_returns = expected_returns.mean_historical_return(price)
    cov_mat = risk_models.sample_cov(price)

    # Get weights maximizing the Sharpe ratio
    ef = EfficientFrontier(avg_returns, cov_mat)
    weights = ef.max_sharpe()
    clean_weights = ef.clean_weights()
    weights = np.array([clean_weights[symbol] for symbol in symbols])
    best_sharpe_ratio = base_optimizer.portfolio_performance(weights, avg_returns, cov_mat)[2]
            
    return best_sharpe_ratio, weights

In [42]:

pyopt_srb_sharpe = np.full(price.shape[0], np.nan)

# Run simulation with a custom order function
pyopt_srb_pf = vbt.Portfolio.from_order_func(
    price,
    order_func_nb,
    pre_sim_func_nb=pre_sim_func_nb,
    pre_sim_args=(30,),
    pre_segment_func_nb=pre_segment_func_nb.py_func,  # run pre_segment_func_nb as pure Python function
    pre_segment_args=(pyopt_find_weights, -1, ann_factor, num_tests, pyopt_srb_sharpe),
    cash_sharing=True, 
    group_by=True,
    use_numba=False  # run simulate_nb as pure Python function
)

In [43]:

pd.Series(pyopt_srb_sharpe, index=price.index).vbt.scatterplot(trace_kwargs=dict(mode='markers')).show_svg()

Out[43]:

In [44]:

print(pyopt_srb_pf.stats())

Out[44]:

Start                         2017-01-03 00:00:00+00:00
End                           2019-12-31 00:00:00+00:00
Period                                754 days 00:00:00
Start Value                                       100.0
End Value                                    166.711167
Total Return [%]                              66.711167
Benchmark Return [%]                         121.870057
Max Gross Exposure [%]                            100.0
Total Fees Paid                                     0.0
Max Drawdown [%]                              35.363921
Max Drawdown Duration                 333 days 00:00:00
Total Trades                                         44
Total Closed Trades                                  41
Total Open Trades                                     3
Open Trade PnL                                27.442055
Win Rate [%]                                   78.04878
Best Trade [%]                                35.953901
Worst Trade [%]                              -30.635401
Avg Winning Trade [%]                         12.751618
Avg Losing Trade [%]                          -9.885814
Avg Winning Trade Duration            195 days 22:30:00
Avg Losing Trade Duration             173 days 08:00:00
Profit Factor                                  3.224386
Expectancy                                     0.957783
Sharpe Ratio                                   0.808511
Calmar Ratio                                   0.526731
Omega Ratio                                    1.160979
Sortino Ratio                                  1.148531
Name: group, dtype: object

In [45]:

plot_allocation(pyopt_srb_pf)

Out[45]:

In [ ]:

vectorbt: Random search

One-time allocation

Rebalance monthly

Search and rebalance every 30 days

PyPortfolioOpt + vectorbt

One-time allocation

Search and rebalance monthly

Product

Resources

Company