Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
polakowo
GitHub Repository: polakowo/vectorbt
Path: blob/master/examples/PairsTrading.ipynb
1071 views
Kernel: Python 3 (ipykernel)

In this example, we will compare development of a pairs trading strategy using backtrader and vectorbt.

import numpy as np import pandas as pd import datetime import collections import math import pytz
import scipy.stats as st SYMBOL1 = 'PEP' SYMBOL2 = 'KO' FROMDATE = datetime.datetime(2017, 1, 1, tzinfo=pytz.utc) TODATE = datetime.datetime(2019, 1, 1, tzinfo=pytz.utc) PERIOD = 100 CASH = 100000 COMMPERC = 0.005 # 0.5% ORDER_PCT1 = 0.1 ORDER_PCT2 = 0.1 UPPER = st.norm.ppf(1 - 0.05 / 2) LOWER = -st.norm.ppf(1 - 0.05 / 2) MODE = 'OLS' # OLS, log_return

Data

import vectorbt as vbt start_date = FROMDATE.replace(tzinfo=pytz.utc) end_date = TODATE.replace(tzinfo=pytz.utc) data = vbt.YFData.download([SYMBOL1, SYMBOL2], start=start_date, end=end_date) data = data.loc[(data.wrapper.index >= start_date) & (data.wrapper.index < end_date)]
print(data.data[SYMBOL1].iloc[[0, -1]]) print(data.data[SYMBOL2].iloc[[0, -1]])
Open High Low Close \ Date 2017-01-03 00:00:00+00:00 91.831129 91.962386 91.192316 91.577354 2018-12-31 00:00:00+00:00 102.775161 103.249160 101.604091 102.682220 Volume Dividends Stock Splits Date 2017-01-03 00:00:00+00:00 3741200 0.0 0 2018-12-31 00:00:00+00:00 5019100 0.0 0 Open High Low Close \ Date 2017-01-03 00:00:00+00:00 35.801111 36.068542 35.611321 36.059914 2018-12-31 00:00:00+00:00 43.810820 43.856946 43.321878 43.681664 Volume Dividends Stock Splits Date 2017-01-03 00:00:00+00:00 14711000 0.0 0 2018-12-31 00:00:00+00:00 10576300 0.0 0

backtrader

import backtrader as bt import backtrader.feeds as btfeeds import backtrader.indicators as btind class CommInfoFloat(bt.CommInfoBase): """Commission schema that keeps size as float.""" params = ( ('stocklike', True), ('commtype', bt.CommInfoBase.COMM_PERC), ('percabs', True), ) def getsize(self, price, cash): if not self._stocklike: return self.p.leverage * (cash / self.get_margin(price)) return self.p.leverage * (cash / price) class OLSSlopeIntercept(btind.PeriodN): """Calculates a linear regression using OLS.""" _mindatas = 2 # ensure at least 2 data feeds are passed packages = ( ('pandas', 'pd'), ('statsmodels.api', 'sm'), ) lines = ('slope', 'intercept',) params = ( ('period', 10), ) def next(self): p0 = pd.Series(self.data0.get(size=self.p.period)) p1 = pd.Series(self.data1.get(size=self.p.period)) p1 = sm.add_constant(p1) intercept, slope = sm.OLS(p0, p1).fit().params self.lines.slope[0] = slope self.lines.intercept[0] = intercept class Log(btind.Indicator): """Calculates log.""" lines = ('log',) def next(self): self.l.log[0] = math.log(self.data[0]) class OLSSpread(btind.PeriodN): """Calculates the z-score of the OLS spread.""" _mindatas = 2 # ensure at least 2 data feeds are passed lines = ('spread', 'spread_mean', 'spread_std', 'zscore',) params = (('period', 10),) def __init__(self): data0_log = Log(self.data0) data1_log = Log(self.data1) slint = OLSSlopeIntercept(data0_log, data1_log, period=self.p.period) spread = data0_log - (slint.slope * data1_log + slint.intercept) self.l.spread = spread self.l.spread_mean = bt.ind.SMA(spread, period=self.p.period) self.l.spread_std = bt.ind.StdDev(spread, period=self.p.period) self.l.zscore = (spread - self.l.spread_mean) / self.l.spread_std class LogReturns(btind.PeriodN): """Calculates the log returns.""" lines = ('logret',) params = (('period', 1),) def __init__(self): self.addminperiod(self.p.period + 1) def next(self): self.l.logret[0] = math.log(self.data[0] / self.data[-self.p.period]) class LogReturnSpread(btind.PeriodN): """Calculates the spread of the log returns.""" _mindatas = 2 # ensure at least 2 data feeds are passed lines = ('logret0', 'logret1', 'spread', 'spread_mean', 'spread_std', 'zscore',) params = (('period', 10),) def __init__(self): self.l.logret0 = LogReturns(self.data0, period=1) self.l.logret1 = LogReturns(self.data1, period=1) self.l.spread = self.l.logret0 - self.l.logret1 self.l.spread_mean = bt.ind.SMA(self.l.spread, period=self.p.period) self.l.spread_std = bt.ind.StdDev(self.l.spread, period=self.p.period) self.l.zscore = (self.l.spread - self.l.spread_mean) / self.l.spread_std class PairTradingStrategy(bt.Strategy): """Basic pair trading strategy.""" params = dict( period=PERIOD, order_pct1=ORDER_PCT1, order_pct2=ORDER_PCT2, printout=True, upper=UPPER, lower=LOWER, mode=MODE ) def log(self, txt, dt=None): if self.p.printout: dt = dt or self.data.datetime[0] dt = bt.num2date(dt) print('%s, %s' % (dt.isoformat(), txt)) def notify_order(self, order): if order.status in [bt.Order.Submitted, bt.Order.Accepted]: return # Await further notifications if order.status == order.Completed: if order.isbuy(): buytxt = 'BUY COMPLETE {}, size = {:.2f}, price = {:.2f}'.format( order.data._name, order.executed.size, order.executed.price) self.log(buytxt, order.executed.dt) else: selltxt = 'SELL COMPLETE {}, size = {:.2f}, price = {:.2f}'.format( order.data._name, order.executed.size, order.executed.price) self.log(selltxt, order.executed.dt) elif order.status in [order.Expired, order.Canceled, order.Margin]: self.log('%s ,' % order.Status[order.status]) pass # Simply log # Allow new orders self.orderid = None def __init__(self): # To control operation entries self.orderid = None self.order_pct1 = self.p.order_pct1 self.order_pct2 = self.p.order_pct2 self.upper = self.p.upper self.lower = self.p.lower self.status = 0 # Signals performed with PD.OLS : if self.p.mode == 'log_return': self.transform = LogReturnSpread(self.data0, self.data1, period=self.p.period) elif self.p.mode == 'OLS': self.transform = OLSSpread(self.data0, self.data1, period=self.p.period) else: raise ValueError("Unknown mode") self.spread = self.transform.spread self.zscore = self.transform.zscore # For tracking self.spread_sr = pd.Series(dtype=float, name='spread') self.zscore_sr = pd.Series(dtype=float, name='zscore') self.short_signal_sr = pd.Series(dtype=bool, name='short_signals') self.long_signal_sr = pd.Series(dtype=bool, name='long_signals') def next(self): if self.orderid: return # if an order is active, no new orders are allowed self.spread_sr[self.data0.datetime.datetime()] = self.spread[0] self.zscore_sr[self.data0.datetime.datetime()] = self.zscore[0] self.short_signal_sr[self.data0.datetime.datetime()] = False self.long_signal_sr[self.data0.datetime.datetime()] = False if self.zscore[0] > self.upper and self.status != 1: # Check conditions for shorting the spread & place the order self.short_signal_sr[self.data0.datetime.datetime()] = True # Placing the order self.log('SELL CREATE {}, price = {:.2f}, target pct = {:.2%}'.format( self.data0._name, self.data0.close[0], -self.order_pct1)) self.order_target_percent(data=self.data0, target=-self.order_pct1) self.log('BUY CREATE {}, price = {:.2f}, target pct = {:.2%}'.format( self.data1._name, self.data1.close[0], self.order_pct2)) self.order_target_percent(data=self.data1, target=self.order_pct2) self.status = 1 elif self.zscore[0] < self.lower and self.status != 2: # Check conditions for longing the spread & place the order self.long_signal_sr[self.data0.datetime.datetime()] = True # Place the order self.log('SELL CREATE {}, price = {:.2f}, target pct = {:.2%}'.format( self.data1._name, self.data1.close[0], -self.order_pct2)) self.order_target_percent(data=self.data1, target=-self.order_pct2) self.log('BUY CREATE {}, price = {:.2f}, target pct = {:.2%}'.format( self.data0._name, self.data0.close[0], self.order_pct1)) self.order_target_percent(data=self.data0, target=self.order_pct1) self.status = 2 def stop(self): if self.p.printout: print('==================================================') print('Starting Value - %.2f' % self.broker.startingcash) print('Ending Value - %.2f' % self.broker.getvalue()) print('==================================================')
class DataAnalyzer(bt.analyzers.Analyzer): """Analyzer to extract OHLCV.""" def create_analysis(self): self.rets0 = {} self.rets1 = {} def next(self): self.rets0[self.strategy.datetime.datetime()] = [ self.data0.open[0], self.data0.high[0], self.data0.low[0], self.data0.close[0], self.data0.volume[0] ] self.rets1[self.strategy.datetime.datetime()] = [ self.data1.open[0], self.data1.high[0], self.data1.low[0], self.data1.close[0], self.data1.volume[0] ] def get_analysis(self): return self.rets0, self.rets1 class CashValueAnalyzer(bt.analyzers.Analyzer): """Analyzer to extract cash and value.""" def create_analysis(self): self.rets = {} def notify_cashvalue(self, cash, value): self.rets[self.strategy.datetime.datetime()] = (cash, value) def get_analysis(self): return self.rets class OrderAnalyzer(bt.analyzers.Analyzer): """Analyzer to extract order price, size, value, and paid commission.""" def create_analysis(self): self.rets0 = {} self.rets1 = {} def notify_order(self, order): if order.status == order.Completed: if order.data._name == SYMBOL1: rets = self.rets0 else: rets = self.rets1 rets[self.strategy.datetime.datetime()] = ( order.executed.price, order.executed.size, -order.executed.size * order.executed.price, order.executed.comm ) def get_analysis(self): return self.rets0, self.rets1
def prepare_cerebro(data0, data1, use_analyzers=True, **params): # Create a cerebro cerebro = bt.Cerebro() # Add the 1st data to cerebro cerebro.adddata(data0) # Add the 2nd data to cerebro cerebro.adddata(data1) # Add the strategy cerebro.addstrategy(PairTradingStrategy, **params) # Add the commission - only stocks like a for each operation cerebro.broker.setcash(CASH) # Add the commission - only stocks like a for each operation comminfo = CommInfoFloat(commission=COMMPERC) cerebro.broker.addcommissioninfo(comminfo) if use_analyzers: # Add analyzers cerebro.addanalyzer(DataAnalyzer) cerebro.addanalyzer(CashValueAnalyzer) cerebro.addanalyzer(OrderAnalyzer) return cerebro
class PandasData(btfeeds.PandasData): params = ( # Possible values for datetime (must always be present) # None : datetime is the "index" in the Pandas Dataframe # -1 : autodetect position or case-wise equal name # >= 0 : numeric index to the colum in the pandas dataframe # string : column name (as index) in the pandas dataframe ('datetime', None), ('open', 'Open'), ('high', 'High'), ('low', 'Low'), ('close', 'Close'), ('volume', 'Volume'), ('openinterest', None), )
# Create the 1st data data0 = PandasData(dataname=data.data[SYMBOL1], name=SYMBOL1) # Create the 2nd data data1 = PandasData(dataname=data.data[SYMBOL2], name=SYMBOL2) # Prepare a cerebro cerebro = prepare_cerebro(data0, data1) # And run it bt_strategy = cerebro.run()[0]
/Users/olegpolakow/miniconda3/lib/python3.7/site-packages/statsmodels/tsa/tsatools.py:142: FutureWarning: In a future version of pandas all arguments of concat except for the argument 'objs' will be keyword-only x = pd.concat(x[::order], 1)
2017-11-14T00:00:00, SELL CREATE PEP, price = 103.41, target pct = -10.00% 2017-11-14T00:00:00, BUY CREATE KO, price = 41.95, target pct = 10.00% 2017-11-15T00:00:00, SELL COMPLETE PEP, size = -96.70, price = 103.32 2017-11-15T00:00:00, BUY COMPLETE KO, size = 238.39, price = 41.85 2018-04-11T00:00:00, SELL CREATE KO, price = 39.57, target pct = -10.00% 2018-04-11T00:00:00, BUY CREATE PEP, price = 98.49, target pct = 10.00% 2018-04-12T00:00:00, SELL COMPLETE KO, size = -490.65, price = 39.65 2018-04-12T00:00:00, BUY COMPLETE PEP, size = 198.06, price = 98.74 2018-08-31T00:00:00, SELL CREATE PEP, price = 102.44, target pct = -10.00% 2018-08-31T00:00:00, BUY CREATE KO, price = 40.45, target pct = 10.00% 2018-09-04T00:00:00, SELL COMPLETE PEP, size = -198.78, price = 102.26 2018-09-04T00:00:00, BUY COMPLETE KO, size = 498.98, price = 40.48 2018-10-02T00:00:00, SELL CREATE KO, price = 42.57, target pct = -10.00% 2018-10-02T00:00:00, BUY CREATE PEP, price = 100.25, target pct = 10.00% 2018-10-03T00:00:00, SELL COMPLETE KO, size = -482.28, price = 42.52 2018-10-03T00:00:00, BUY COMPLETE PEP, size = 197.45, price = 100.70 2018-11-30T00:00:00, SELL CREATE PEP, price = 112.44, target pct = -10.00% 2018-11-30T00:00:00, BUY CREATE KO, price = 46.50, target pct = 10.00% 2018-12-03T00:00:00, SELL COMPLETE PEP, size = -189.20, price = 111.09 2018-12-03T00:00:00, BUY COMPLETE KO, size = 451.21, price = 46.01 ================================================== Starting Value - 100000.00 Ending Value - 100284.08 ==================================================
%matplotlib inline import matplotlib.pyplot as plt plt.rcParams["figure.figsize"] = (13, 8) cerebro.plot(iplot=False)
Image in a Jupyter notebook
[[<Figure size 1300x800 with 7 Axes>]]
# Extract OHLCV bt_s1_rets, bt_s2_rets = bt_strategy.analyzers.dataanalyzer.get_analysis() data_cols = ['open', 'high', 'low', 'close', 'volume'] bt_s1_ohlcv = pd.DataFrame.from_dict(bt_s1_rets, orient='index', columns=data_cols) bt_s2_ohlcv = pd.DataFrame.from_dict(bt_s2_rets, orient='index', columns=data_cols) print(bt_s1_ohlcv.shape) print(bt_s2_ohlcv.shape) print(bt_s1_ohlcv.iloc[[0, -1]]) print(bt_s2_ohlcv.iloc[[0, -1]])
(502, 5) (502, 5) open high low close volume 2017-01-03 91.831129 91.962386 91.192316 91.577354 3741200.0 2018-12-31 102.775161 103.249160 101.604091 102.682220 5019100.0 open high low close volume 2017-01-03 35.801111 36.068542 35.611321 36.059914 14711000.0 2018-12-31 43.810820 43.856946 43.321878 43.681664 10576300.0
try: np.testing.assert_allclose(bt_s1_ohlcv.values, data.data[SYMBOL1].iloc[:, :5].values) np.testing.assert_allclose(bt_s2_ohlcv.values, data.data[SYMBOL2].iloc[:, :5].values) except AssertionError as e: print(e)
# Extract cash and value series bt_cashvalue_rets = bt_strategy.analyzers.cashvalueanalyzer.get_analysis() bt_cashvalue_df = pd.DataFrame.from_dict(bt_cashvalue_rets, orient='index', columns=['cash', 'value']) bt_cash = bt_cashvalue_df['cash'] bt_value = bt_cashvalue_df['value'] print(bt_cash.iloc[[0, -1]]) print(bt_value.iloc[[0, -1]])
2017-01-03 100000.000000 2018-12-31 100020.682139 Name: cash, dtype: float64 2017-01-03 100000.000000 2018-12-31 100284.081822 Name: value, dtype: float64
# Extract order info bt_s1_order_rets, bt_s2_order_rets = bt_strategy.analyzers.orderanalyzer.get_analysis() order_cols = ['order_price', 'order_size', 'order_value', 'order_comm'] bt_s1_orders = pd.DataFrame.from_dict(bt_s1_order_rets, orient='index', columns=order_cols) bt_s2_orders = pd.DataFrame.from_dict(bt_s2_order_rets, orient='index', columns=order_cols) print(bt_s1_orders.iloc[[0, -1]]) print(bt_s2_orders.iloc[[0, -1]])
order_price order_size order_value order_comm 2017-11-15 103.316229 -96.698241 9990.497637 49.952488 2018-12-03 111.094282 -189.202087 21019.269920 105.096350 order_price order_size order_value order_comm 2017-11-15 41.851497 238.385955 -9976.809115 49.884046 2018-12-03 46.006429 451.211817 -20758.644494 103.793222
# Extract spread and z-score bt_spread = bt_strategy.spread_sr bt_zscore = bt_strategy.zscore_sr print(bt_spread.iloc[[0, -1]]) print(bt_zscore.iloc[[0, -1]])
2017-10-16 -0.009388 2018-12-31 -0.021157 Name: spread, dtype: float64 2017-10-16 0.242706 2018-12-31 -0.307914 Name: zscore, dtype: float64
# Extract signals bt_short_signals = bt_strategy.short_signal_sr bt_long_signals = bt_strategy.long_signal_sr print(bt_short_signals[bt_short_signals]) print(bt_long_signals[bt_long_signals])
2017-11-14 True 2018-08-31 True 2018-11-30 True Name: short_signals, dtype: bool 2018-04-11 True 2018-10-02 True Name: long_signals, dtype: bool
# How fast is bt? cerebro = prepare_cerebro(data0, data1, use_analyzers=False, printout=False) %timeit cerebro.run(preload=False)
1.07 s ± 16.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

vectorbt

Using Portfolio.from_orders

from numba import njit @njit def rolling_logret_zscore_nb(a, b, period): """Calculate the log return spread.""" spread = np.full_like(a, np.nan, dtype=np.float64) spread[1:] = np.log(a[1:] / a[:-1]) - np.log(b[1:] / b[:-1]) zscore = np.full_like(a, np.nan, dtype=np.float64) for i in range(a.shape[0]): from_i = max(0, i + 1 - period) to_i = i + 1 if i < period - 1: continue spread_mean = np.mean(spread[from_i:to_i]) spread_std = np.std(spread[from_i:to_i]) zscore[i] = (spread[i] - spread_mean) / spread_std return spread, zscore @njit def ols_spread_nb(a, b): """Calculate the OLS spread.""" a = np.log(a) b = np.log(b) _b = np.vstack((b, np.ones(len(b)))).T slope, intercept = np.dot(np.linalg.inv(np.dot(_b.T, _b)), np.dot(_b.T, a)) spread = a - (slope * b + intercept) return spread[-1] @njit def rolling_ols_zscore_nb(a, b, period): """Calculate the z-score of the rolling OLS spread.""" spread = np.full_like(a, np.nan, dtype=np.float64) zscore = np.full_like(a, np.nan, dtype=np.float64) for i in range(a.shape[0]): from_i = max(0, i + 1 - period) to_i = i + 1 if i < period - 1: continue spread[i] = ols_spread_nb(a[from_i:to_i], b[from_i:to_i]) spread_mean = np.mean(spread[from_i:to_i]) spread_std = np.std(spread[from_i:to_i]) zscore[i] = (spread[i] - spread_mean) / spread_std return spread, zscore
# Calculate OLS z-score using Numba for a nice speedup if MODE == 'OLS': vbt_spread, vbt_zscore = rolling_ols_zscore_nb( bt_s1_ohlcv['close'].values, bt_s2_ohlcv['close'].values, PERIOD ) elif MODE == 'log_return': vbt_spread, vbt_zscore = rolling_logret_zscore_nb( bt_s1_ohlcv['close'].values, bt_s2_ohlcv['close'].values, PERIOD ) else: raise ValueError("Unknown mode") vbt_spread = pd.Series(vbt_spread, index=bt_s1_ohlcv.index, name='spread') vbt_zscore = pd.Series(vbt_zscore, index=bt_s1_ohlcv.index, name='zscore')
# Assert equality of bt and vbt z-score arrays pd.testing.assert_series_equal(bt_spread, vbt_spread[bt_spread.index]) pd.testing.assert_series_equal(bt_zscore, vbt_zscore[bt_zscore.index])
# Generate short and long spread signals vbt_short_signals = (vbt_zscore > UPPER).rename('short_signals') vbt_long_signals = (vbt_zscore < LOWER).rename('long_signals')
vbt_short_signals, vbt_long_signals = pd.Series.vbt.signals.clean( vbt_short_signals, vbt_long_signals, entry_first=False, broadcast_kwargs=dict(columns_from='keep')) def plot_spread_and_zscore(spread, zscore): fig = vbt.make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.05) spread.vbt.plot(add_trace_kwargs=dict(row=1, col=1), fig=fig) zscore.vbt.plot(add_trace_kwargs=dict(row=2, col=1), fig=fig) vbt_short_signals.vbt.signals.plot_as_exit_markers(zscore, add_trace_kwargs=dict(row=2, col=1), fig=fig) vbt_long_signals.vbt.signals.plot_as_entry_markers(zscore, add_trace_kwargs=dict(row=2, col=1), fig=fig) fig.update_layout(height=500) fig.add_shape( type="rect", xref='paper', yref='y2', x0=0, y0=UPPER, x1=1, y1=LOWER, fillcolor="gray", opacity=0.2, layer="below", line_width=0, ) return fig plot_spread_and_zscore(vbt_spread, vbt_zscore).show_svg()
Image in a Jupyter notebook
# Assert equality of bt and vbt signal arrays pd.testing.assert_series_equal( bt_short_signals[bt_short_signals], vbt_short_signals[vbt_short_signals] ) pd.testing.assert_series_equal( bt_long_signals[bt_long_signals], vbt_long_signals[vbt_long_signals] )
# Build percentage order size symbol_cols = pd.Index([SYMBOL1, SYMBOL2], name='symbol') vbt_order_size = pd.DataFrame(index=bt_s1_ohlcv.index, columns=symbol_cols) vbt_order_size[SYMBOL1] = np.nan vbt_order_size[SYMBOL2] = np.nan vbt_order_size.loc[vbt_short_signals, SYMBOL1] = -ORDER_PCT1 vbt_order_size.loc[vbt_long_signals, SYMBOL1] = ORDER_PCT1 vbt_order_size.loc[vbt_short_signals, SYMBOL2] = ORDER_PCT2 vbt_order_size.loc[vbt_long_signals, SYMBOL2] = -ORDER_PCT2 # Execute at the next bar vbt_order_size = vbt_order_size.vbt.fshift(1) print(vbt_order_size[~vbt_order_size.isnull().any(axis=1)])
symbol PEP KO 2017-11-15 -0.1 0.1 2018-04-12 0.1 -0.1 2018-09-04 -0.1 0.1 2018-10-03 0.1 -0.1 2018-12-03 -0.1 0.1
# Simulate the portfolio vbt_close_price = pd.concat((bt_s1_ohlcv['close'], bt_s2_ohlcv['close']), axis=1, keys=symbol_cols) vbt_open_price = pd.concat((bt_s1_ohlcv['open'], bt_s2_ohlcv['open']), axis=1, keys=symbol_cols) def simulate_from_orders(): """Simulate using `Portfolio.from_orders`.""" return vbt.Portfolio.from_orders( vbt_close_price, # current close as reference price size=vbt_order_size, price=vbt_open_price, # current open as execution price size_type='targetpercent', val_price=vbt_close_price.vbt.fshift(1), # previous close as group valuation price init_cash=CASH, fees=COMMPERC, cash_sharing=True, # share capital between assets in the same group group_by=True, # all columns belong to the same group call_seq='auto', # sell before buying freq='d' # index frequency for annualization ) vbt_pf = simulate_from_orders()
print(vbt_pf.orders.records_readable)
Order Id Column Timestamp Size Price Fees Side 0 0 PEP 2017-11-15 96.698241 103.316229 49.952488 Sell 1 1 KO 2017-11-15 238.385955 41.851497 49.884046 Buy 2 2 KO 2018-04-12 490.647327 39.652521 97.277017 Sell 3 3 PEP 2018-04-12 198.056953 98.739352 97.780076 Buy 4 4 PEP 2018-09-04 198.781680 102.263047 101.640101 Sell 5 5 KO 2018-09-04 498.975917 40.477607 100.986755 Buy 6 6 KO 2018-10-03 482.284086 42.524342 102.544068 Sell 7 7 PEP 2018-10-03 197.454665 100.702245 99.420640 Buy 8 8 PEP 2018-12-03 189.202087 111.094282 105.096350 Sell 9 9 KO 2018-12-03 451.211817 46.006429 103.793222 Buy
# Proof that both bt and vbt produce the same result pd.testing.assert_series_equal(bt_cash, vbt_pf.cash().rename('cash')) pd.testing.assert_series_equal(bt_value, vbt_pf.value().rename('value'))
print(vbt_pf.stats())
Start 2017-01-03 00:00:00 End 2018-12-31 00:00:00 Period 502 days 00:00:00 Start Value 100000.0 End Value 100284.081822 Total Return [%] 0.284082 Benchmark Return [%] 16.631282 Max Gross Exposure [%] 0.915879 Total Fees Paid 908.374763 Max Drawdown [%] 1.030291 Max Drawdown Duration 168 days 00:00:00 Total Trades 10 Total Closed Trades 8 Total Open Trades 2 Open Trade PnL 149.652774 Win Rate [%] 62.5 Best Trade [%] 9.267971 Worst Trade [%] -9.229397 Avg Winning Trade [%] 3.967201 Avg Losing Trade [%] -6.182852 Avg Winning Trade Duration 56 days 19:12:00 Avg Losing Trade Duration 80 days 16:00:00 Profit Factor 1.072464 Expectancy 16.803631 Sharpe Ratio 0.185035 Calmar Ratio 0.200403 Omega Ratio 1.036058 Sortino Ratio 0.266979 Name: group, dtype: object
# Plot portfolio from functools import partial def plot_orders(portfolio, column=None, add_trace_kwargs=None, fig=None): portfolio.orders.plot(column=column, add_trace_kwargs=add_trace_kwargs, fig=fig) vbt_pf.plot(subplots=[ ('symbol1_orders', dict( title=f"Orders ({SYMBOL1})", yaxis_title="Price", check_is_not_grouped=False, plot_func=partial(plot_orders, column=SYMBOL1), pass_column=False )), ('symbol2_orders', dict( title=f"Orders ({SYMBOL2})", yaxis_title="Price", check_is_not_grouped=False, plot_func=partial(plot_orders, column=SYMBOL2), pass_column=False )) ]).show_svg()
Image in a Jupyter notebook
# How fast is vbt? %timeit simulate_from_orders()
3.72 ms ± 15.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)

While Portfolio.from_orders is a very convenient and optimized function for simulating portfolios, it requires some prior steps to produce the size array. In the example above, we needed to manually run the calculation of the spread z-score, generate the signals from the z-score, build the size array from the signals, and make sure that all arrays are perfectly aligned. All these steps must be repeated and adapted accordingly once there is more than one hyperparameter combination to test.

Nevertheless, dividing the pipeline into clearly separated backtesting steps helps us to analyze each step thoroughly and actually does wonders for strategy development and debugging.

Using Portfolio.from_order_func

Portfolio.from_order_func follows a different (self-contained) approach where as much steps as possible should be defined in the simulation function itself. It sequentially processes timestamps one by one and executes orders based on the logic the user defined rather than parses this logic from some arrays. While this makes order execution less transparent as you cannot analyze each piece of data on the fly anymore (sadly, no pandas and plotting within Numba), it has one big advantage over other vectorized methods: event-driven order processing. This gives best flexibility (you can write any logic), security (less probability of exposing yourself to a look-ahead bias among other biases), and performance (you're traversing the data only once). This method is the most similar one compared to backtrader.

from vectorbt.portfolio import nb as portfolio_nb from vectorbt.base.reshape_fns import flex_select_auto_nb from vectorbt.portfolio.enums import SizeType, Direction from collections import namedtuple Memory = namedtuple("Memory", ('spread', 'zscore', 'status')) Params = namedtuple("Params", ('period', 'upper', 'lower', 'order_pct1', 'order_pct2')) @njit def pre_group_func_nb(c, _period, _upper, _lower, _order_pct1, _order_pct2): """Prepare the current group (= pair of columns).""" assert c.group_len == 2 # In contrast to bt, we don't have a class instance that we could use to store arrays, # so let's create a namedtuple acting as a container for our arrays # ( you could also pass each array as a standalone object, but a single object is more convenient) spread = np.full(c.target_shape[0], np.nan, dtype=np.float64) zscore = np.full(c.target_shape[0], np.nan, dtype=np.float64) # Note that namedtuples aren't mutable, you can't simply assign a value, # thus make status variable an array of one element for an easy assignment status = np.full(1, 0, dtype=np.int64) memory = Memory(spread, zscore, status) # Treat each param as an array with value per group, and select the combination of params for this group period = flex_select_auto_nb(np.asarray(_period), 0, c.group, True) upper = flex_select_auto_nb(np.asarray(_upper), 0, c.group, True) lower = flex_select_auto_nb(np.asarray(_lower), 0, c.group, True) order_pct1 = flex_select_auto_nb(np.asarray(_order_pct1), 0, c.group, True) order_pct2 = flex_select_auto_nb(np.asarray(_order_pct2), 0, c.group, True) # Put all params into a container (again, this is optional) params = Params(period, upper, lower, order_pct1, order_pct2) # Create an array that will store our two target percentages used by order_func_nb # we do it here instead of in pre_segment_func_nb to initialize the array once, instead of in each row size = np.empty(c.group_len, dtype=np.float64) # The returned tuple is passed as arguments to the function below return (memory, params, size) @njit def pre_segment_func_nb(c, memory, params, size, mode): """Prepare the current segment (= row within group).""" # We want to perform calculations once we reach full window size if c.i < params.period - 1: size[0] = np.nan # size of nan means no order size[1] = np.nan return (size,) # z-core is calculated using a window (=period) of spread values # This window can be specified as a slice window_slice = slice(max(0, c.i + 1 - params.period), c.i + 1) # Here comes the same as in rolling_ols_zscore_nb if mode == 'OLS': a = c.close[window_slice, c.from_col] b = c.close[window_slice, c.from_col + 1] memory.spread[c.i] = ols_spread_nb(a, b) elif mode == 'log_return': logret_a = np.log(c.close[c.i, c.from_col] / c.close[c.i - 1, c.from_col]) logret_b = np.log(c.close[c.i, c.from_col + 1] / c.close[c.i - 1, c.from_col + 1]) memory.spread[c.i] = logret_a - logret_b else: raise ValueError("Unknown mode") spread_mean = np.mean(memory.spread[window_slice]) spread_std = np.std(memory.spread[window_slice]) memory.zscore[c.i] = (memory.spread[c.i] - spread_mean) / spread_std # Check if any bound is crossed # Since zscore is calculated using close, use zscore of the previous step # This way we are executing signals defined at the previous bar # Same logic as in PairTradingStrategy if memory.zscore[c.i - 1] > params.upper and memory.status[0] != 1: size[0] = -params.order_pct1 size[1] = params.order_pct2 # Here we specify the order of execution # call_seq_now defines order for the current group (2 elements) c.call_seq_now[0] = 0 c.call_seq_now[1] = 1 memory.status[0] = 1 elif memory.zscore[c.i - 1] < params.lower and memory.status[0] != 2: size[0] = params.order_pct1 size[1] = -params.order_pct2 c.call_seq_now[0] = 1 # execute the second order first to release funds early c.call_seq_now[1] = 0 memory.status[0] = 2 else: size[0] = np.nan size[1] = np.nan # Group value is converted to shares using previous close, just like in bt # Note that last_val_price contains valuation price of all columns, not just the current pair c.last_val_price[c.from_col] = c.close[c.i - 1, c.from_col] c.last_val_price[c.from_col + 1] = c.close[c.i - 1, c.from_col + 1] return (size,) @njit def order_func_nb(c, size, price, commperc): """Place an order (= element within group and row).""" # Get column index within group (if group starts at column 58 and current column is 59, # the column within group is 1, which can be used to get size) group_col = c.col - c.from_col return portfolio_nb.order_nb( size=size[group_col], price=price[c.i, c.col], size_type=SizeType.TargetPercent, fees=commperc )
def simulate_from_order_func(): """Simulate using `Portfolio.from_order_func`.""" return vbt.Portfolio.from_order_func( vbt_close_price, order_func_nb, vbt_open_price.values, COMMPERC, # *args for order_func_nb pre_group_func_nb=pre_group_func_nb, pre_group_args=(PERIOD, UPPER, LOWER, ORDER_PCT1, ORDER_PCT2), pre_segment_func_nb=pre_segment_func_nb, pre_segment_args=(MODE,), fill_pos_record=False, # a bit faster init_cash=CASH, cash_sharing=True, group_by=True, freq='d' ) vbt_pf2 = simulate_from_order_func()
print(vbt_pf2.orders.records_readable)
Order Id Column Timestamp Size Price Fees Side 0 0 PEP 2017-11-15 96.698241 103.316229 49.952488 Sell 1 1 KO 2017-11-15 238.385955 41.851497 49.884046 Buy 2 2 KO 2018-04-12 490.647327 39.652521 97.277017 Sell 3 3 PEP 2018-04-12 198.056953 98.739352 97.780076 Buy 4 4 PEP 2018-09-04 198.781680 102.263047 101.640101 Sell 5 5 KO 2018-09-04 498.975917 40.477607 100.986755 Buy 6 6 KO 2018-10-03 482.284086 42.524342 102.544068 Sell 7 7 PEP 2018-10-03 197.454665 100.702245 99.420640 Buy 8 8 PEP 2018-12-03 189.202087 111.094282 105.096350 Sell 9 9 KO 2018-12-03 451.211817 46.006429 103.793222 Buy
# Proof that both bt and vbt produce the same result pd.testing.assert_series_equal(bt_cash, vbt_pf2.cash().rename('cash')) pd.testing.assert_series_equal(bt_value, vbt_pf2.value().rename('value'))
# How fast is vbt? %timeit simulate_from_order_func()
4.4 ms ± 17.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)

Numba paradise (or hell?) - fastest

def simulate_nb_from_order_func(): """Simulate using `simulate_nb`.""" # iterate over 502 rows and 2 columns, each element is a potential order target_shape = vbt_close_price.shape # number of columns in the group - exactly two group_lens = np.array([2]) # build default call sequence (orders are executed from the left to the right column) call_seq = portfolio_nb.build_call_seq(target_shape, group_lens) # initial cash per group init_cash = np.array([CASH], dtype=np.float64) order_records, log_records = portfolio_nb.simulate_nb( target_shape=target_shape, group_lens=group_lens, init_cash=init_cash, cash_sharing=True, call_seq=call_seq, segment_mask=np.full(target_shape, True), # used for disabling some segments pre_group_func_nb=pre_group_func_nb, pre_group_args=(PERIOD, UPPER, LOWER, ORDER_PCT1, ORDER_PCT2), pre_segment_func_nb=pre_segment_func_nb, pre_segment_args=(MODE,), order_func_nb=order_func_nb, order_args=(vbt_open_price.values, COMMPERC), close=vbt_close_price.values, # used for target percentage, but we override the valuation price fill_pos_record=False ) return target_shape, group_lens, call_seq, init_cash, order_records, log_records target_shape, group_lens, call_seq, init_cash, order_records, log_records = simulate_nb_from_order_func()
# Print order records in a readable format print(vbt.Orders(vbt_close_price.vbt.wrapper, order_records, vbt_close_price).records_readable)
Order Id Column Timestamp Size Price Fees Side 0 0 PEP 2017-11-15 96.698241 103.316229 49.952488 Sell 1 1 KO 2017-11-15 238.385955 41.851497 49.884046 Buy 2 2 KO 2018-04-12 490.647327 39.652521 97.277017 Sell 3 3 PEP 2018-04-12 198.056953 98.739352 97.780076 Buy 4 4 PEP 2018-09-04 198.781680 102.263047 101.640101 Sell 5 5 KO 2018-09-04 498.975917 40.477607 100.986755 Buy 6 6 KO 2018-10-03 482.284086 42.524342 102.544068 Sell 7 7 PEP 2018-10-03 197.454665 100.702245 99.420640 Buy 8 8 PEP 2018-12-03 189.202087 111.094282 105.096350 Sell 9 9 KO 2018-12-03 451.211817 46.006429 103.793222 Buy
# Proof that both bt and vbt produce the same cash from vectorbt.records import nb as records_nb col_map = records_nb.col_map_nb(order_records['col'], target_shape[1]) cash_flow = portfolio_nb.cash_flow_nb(target_shape, order_records, col_map, False) cash_flow_grouped = portfolio_nb.cash_flow_grouped_nb(cash_flow, group_lens) cash_grouped = portfolio_nb.cash_grouped_nb(target_shape, cash_flow_grouped, group_lens, init_cash) pd.testing.assert_series_equal(bt_cash, bt_cash.vbt.wrapper.wrap(cash_grouped))
# Proof that both bt and vbt produce the same value asset_flow = portfolio_nb.asset_flow_nb(target_shape, order_records, col_map, Direction.Both) assets = portfolio_nb.assets_nb(asset_flow) asset_value = portfolio_nb.asset_value_nb(vbt_close_price.values, assets) asset_value_grouped = portfolio_nb.asset_value_grouped_nb(asset_value, group_lens) value = portfolio_nb.value_nb(cash_grouped, asset_value_grouped) pd.testing.assert_series_equal(bt_value, bt_value.vbt.wrapper.wrap(value))
# To produce more complex metrics such as stats, it's advisable to use Portfolio, # which can be easily constructed from the arguments and outputs of simulate_nb vbt_pf3 = vbt.Portfolio( wrapper=vbt_close_price.vbt(freq='d', group_by=True).wrapper, close=vbt_close_price, order_records=order_records, log_records=log_records, init_cash=init_cash, cash_sharing=True, call_seq=call_seq ) print(vbt_pf3.stats())
Start 2017-01-03 00:00:00 End 2018-12-31 00:00:00 Period 502 days 00:00:00 Start Value 100000.0 End Value 100284.081822 Total Return [%] 0.284082 Benchmark Return [%] 16.631282 Max Gross Exposure [%] 0.915879 Total Fees Paid 908.374763 Max Drawdown [%] 1.030291 Max Drawdown Duration 168 days 00:00:00 Total Trades 10 Total Closed Trades 8 Total Open Trades 2 Open Trade PnL 149.652774 Win Rate [%] 62.5 Best Trade [%] 9.267971 Worst Trade [%] -9.229397 Avg Winning Trade [%] 3.967201 Avg Losing Trade [%] -6.182852 Avg Winning Trade Duration 56 days 19:12:00 Avg Losing Trade Duration 80 days 16:00:00 Profit Factor 1.072464 Expectancy 16.803631 Sharpe Ratio 0.185035 Calmar Ratio 0.200403 Omega Ratio 1.036058 Sortino Ratio 0.266979 Name: group, dtype: object
# How fast is vbt? %timeit simulate_nb_from_order_func()
2.3 ms ± 9.23 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)

As you can see, writing Numba isn't straightforward and requires at least intermediate knowledge of NumPy. That's why Portfolio.from_orders and other class methods based on arrays are usually a good starting point.

Multiple parameters

Now, why waste all energy to port a strategy to vectorbt? Right, for hyperparameter optimization.

The example below is just for demo purposes, usually brute-forcing many combinations on a single data sample easily leads to overfitting.

periods = np.arange(10, 105, 5) uppers = np.arange(1.5, 2.2, 0.1) lowers = -1 * np.arange(1.5, 2.2, 0.1)
def simulate_mult_from_order_func(periods, uppers, lowers): """Simulate multiple parameter combinations using `Portfolio.from_order_func`.""" # Build param grid param_product = vbt.utils.params.create_param_product([periods, uppers, lowers]) param_tuples = list(zip(*param_product)) param_columns = pd.MultiIndex.from_tuples(param_tuples, names=['period', 'upper', 'lower']) # We need two price columns per param combination vbt_close_price_mult = vbt_close_price.vbt.tile(len(param_columns), keys=param_columns) vbt_open_price_mult = vbt_open_price.vbt.tile(len(param_columns), keys=param_columns) return vbt.Portfolio.from_order_func( vbt_close_price_mult, order_func_nb, vbt_open_price_mult.values, COMMPERC, # *args for order_func_nb pre_group_func_nb=pre_group_func_nb, pre_group_args=( np.array(param_product[0]), np.array(param_product[1]), np.array(param_product[2]), ORDER_PCT1, ORDER_PCT2 ), pre_segment_func_nb=pre_segment_func_nb, pre_segment_args=(MODE,), fill_pos_record=False, init_cash=CASH, cash_sharing=True, group_by=param_columns.names, freq='d' ) vbt_pf_mult = simulate_mult_from_order_func(periods, uppers, lowers)
print(vbt_pf_mult.total_return().sort_values()) vbt_pf_mult.total_return().vbt.histplot().show_svg()
period upper lower 10 1.6 -1.5 -0.068466 1.5 -1.5 -0.067897 1.8 -1.5 -0.066885 1.5 -1.6 -0.065260 1.7 -1.5 -0.065036 ... 100 2.0 -2.2 0.003538 1.9 -2.2 0.003538 2.1 -2.2 0.003538 1.8 -2.2 0.003538 2.2 -2.2 0.003538 Name: total_return, Length: 1216, dtype: float64
Image in a Jupyter notebook
# How fast is vbt? %timeit simulate_mult_from_order_func(periods, uppers, lowers)
2.15 s ± 15.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

Even though the strategy is profitable on paper, the majority of hyperparameter combinations yield a loss, so finding a proper "slice" of hyperparameters is just a question of luck when relying on a single backtest. Thanks to vectorbt, we can do thousands of tests in seconds to validate our strategy - the same would last hours using conventional libraries.