Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
polakowo
GitHub Repository: polakowo/vectorbt
Path: blob/master/tests/notebooks/generic.ipynb
1073 views
Kernel: Python 3

generic

import vectorbt as vbt
import numpy as np import pandas as pd from datetime import datetime, timedelta from numba import njit
# Disable caching for performance testing vbt.settings.caching['enabled'] = False
index = pd.DatetimeIndex([ datetime(2018, 1, 1), datetime(2018, 1, 2), datetime(2018, 1, 3), datetime(2018, 1, 4), datetime(2018, 1, 5) ], freq='D') columns = ['a', 'b', 'c'] ts = pd.DataFrame({ 'a': [1, 2, 3, 4, np.nan], 'b': [np.nan, 4, 3, 2, 1], 'c': [1, 2, np.nan, 2, 1]}, index=index ).astype(np.float32) print(ts)
a b c 2018-01-01 1.0 NaN 1.0 2018-01-02 2.0 4.0 2.0 2018-01-03 3.0 3.0 NaN 2018-01-04 4.0 2.0 2.0 2018-01-05 NaN 1.0 1.0
big_ts = pd.DataFrame(np.random.randint(10, size=(1000, 1000)).astype(float)) big_ts.index = [datetime(2018, 1, 1) + timedelta(days=i) for i in range(1000)] big_ts.shape
(1000, 1000)
group_by = np.array([0, 0, 1]) big_group_by = np.repeat(np.arange(500), 2)
# Test index frequency print(ts.vbt.wrapper.freq) print(ts['a'].vbt.wrapper.freq) print(ts.vbt(freq='2D').wrapper.freq) print(ts['a'].vbt(freq='2D').wrapper.freq) print(pd.Series([1, 2, 3]).vbt.wrapper.freq) print(pd.Series([1, 2, 3]).vbt(freq='3D').wrapper.freq) print(pd.Series([1, 2, 3]).vbt(freq=np.timedelta64(4, 'D')).wrapper.freq)
1 days 00:00:00 1 days 00:00:00 2 days 00:00:00 2 days 00:00:00 None 3 days 00:00:00 4 days 00:00:00
print(ts.fillna(-1)) %timeit big_ts.fillna(-1) print(ts.vbt.fillna(-1)) %timeit big_ts.vbt.fillna(-1)
a b c 2018-01-01 1.0 -1.0 1.0 2018-01-02 2.0 4.0 2.0 2018-01-03 3.0 3.0 -1.0 2018-01-04 4.0 2.0 2.0 2018-01-05 -1.0 1.0 1.0 2.76 ms ± 83.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) a b c 2018-01-01 1.0 -1.0 1.0 2018-01-02 2.0 4.0 2.0 2018-01-03 3.0 3.0 -1.0 2018-01-04 4.0 2.0 2.0 2018-01-05 -1.0 1.0 1.0 1.12 ms ± 3.48 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
print(ts.shift(-3)) %timeit big_ts.shift(-3) print(ts.vbt.bshift(3)) %timeit big_ts.vbt.bshift(3)
a b c 2018-01-01 4.0 2.0 2.0 2018-01-02 NaN 1.0 1.0 2018-01-03 NaN NaN NaN 2018-01-04 NaN NaN NaN 2018-01-05 NaN NaN NaN 382 µs ± 1.89 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each) a b c 2018-01-01 4.0 2.0 2.0 2018-01-02 NaN 1.0 1.0 2018-01-03 NaN NaN NaN 2018-01-04 NaN NaN NaN 2018-01-05 NaN NaN NaN 1.59 ms ± 12 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
print(ts.shift(3)) %timeit big_ts.shift(3) print(ts.vbt.fshift(3)) %timeit big_ts.vbt.fshift(3)
a b c 2018-01-01 NaN NaN NaN 2018-01-02 NaN NaN NaN 2018-01-03 NaN NaN NaN 2018-01-04 1.0 NaN 1.0 2018-01-05 2.0 4.0 2.0 405 µs ± 24 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each) a b c 2018-01-01 NaN NaN NaN 2018-01-02 NaN NaN NaN 2018-01-03 NaN NaN NaN 2018-01-04 1.0 NaN 1.0 2018-01-05 2.0 4.0 2.0 1.93 ms ± 187 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
print(ts.diff()) %timeit big_ts.diff() print(ts.vbt.diff()) %timeit big_ts.vbt.diff()
a b c 2018-01-01 NaN NaN NaN 2018-01-02 1.0 NaN 1.0 2018-01-03 1.0 -1.0 NaN 2018-01-04 1.0 -1.0 NaN 2018-01-05 NaN -1.0 -1.0 892 µs ± 8.89 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each) a b c 2018-01-01 NaN NaN NaN 2018-01-02 1.0 NaN 1.0 2018-01-03 1.0 -1.0 NaN 2018-01-04 1.0 -1.0 NaN 2018-01-05 NaN -1.0 -1.0 2.45 ms ± 13.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
print(ts.pct_change(fill_method=None)) %timeit big_ts.pct_change(fill_method=None) print(ts.vbt.pct_change()) %timeit big_ts.vbt.pct_change()
a b c 2018-01-01 NaN NaN NaN 2018-01-02 1.000000 NaN 1.0 2018-01-03 0.500000 -0.250000 NaN 2018-01-04 0.333333 -0.333333 NaN 2018-01-05 NaN -0.500000 -0.5 1.47 ms ± 22.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each) a b c 2018-01-01 NaN NaN NaN 2018-01-02 1.000000 NaN 1.0 2018-01-03 0.500000 -0.250000 NaN 2018-01-04 0.333333 -0.333333 NaN 2018-01-05 NaN -0.500000 -0.5 2.27 ms ± 32.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
print(ts.ffill()) %timeit big_ts.ffill() print(ts.vbt.ffill()) %timeit big_ts.vbt.ffill()
a b c 2018-01-01 1.0 NaN 1.0 2018-01-02 2.0 4.0 2.0 2018-01-03 3.0 3.0 2.0 2018-01-04 4.0 2.0 2.0 2018-01-05 4.0 1.0 1.0 1.68 ms ± 77.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each) a b c 2018-01-01 1.0 NaN 1.0 2018-01-02 2.0 4.0 2.0 2018-01-03 3.0 3.0 2.0 2018-01-04 4.0 2.0 2.0 2018-01-05 4.0 1.0 1.0 3.33 ms ± 12.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
print(ts.product()) %timeit big_ts.product() print(ts.vbt.product()) %timeit big_ts.vbt.product()
a 24.0 b 24.0 c 4.0 dtype: float32 4.3 ms ± 18.7 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) a 24.0 b 24.0 c 4.0 Name: product, dtype: float64 2.55 ms ± 6.41 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
print(ts.cumsum()) %timeit big_ts.cumsum() print(ts.vbt.cumsum()) %timeit big_ts.vbt.cumsum()
a b c 2018-01-01 1.0 NaN 1.0 2018-01-02 3.0 4.0 3.0 2018-01-03 6.0 7.0 NaN 2018-01-04 10.0 9.0 5.0 2018-01-05 NaN 10.0 6.0 4.6 ms ± 79.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) a b c 2018-01-01 1.0 0.0 1.0 2018-01-02 3.0 4.0 3.0 2018-01-03 6.0 7.0 3.0 2018-01-04 10.0 9.0 5.0 2018-01-05 10.0 10.0 6.0 3.53 ms ± 229 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
print(ts.cumprod()) %timeit big_ts.cumprod() print(ts.vbt.cumprod()) %timeit big_ts.vbt.cumprod()
a b c 2018-01-01 1.0 NaN 1.0 2018-01-02 2.0 4.0 2.0 2018-01-03 6.0 12.0 NaN 2018-01-04 24.0 24.0 4.0 2018-01-05 NaN 24.0 4.0 4.45 ms ± 4.72 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) a b c 2018-01-01 1.0 1.0 1.0 2018-01-02 2.0 4.0 2.0 2018-01-03 6.0 12.0 2.0 2018-01-04 24.0 24.0 4.0 2018-01-05 24.0 24.0 4.0 3.72 ms ± 9.51 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
print(ts.rolling(2).min()) %timeit big_ts.rolling(2).min() print(ts.vbt.rolling_min(2)) %timeit big_ts.vbt.rolling_min(2)
a b c 2018-01-01 NaN NaN NaN 2018-01-02 1.0 NaN 1.0 2018-01-03 2.0 3.0 NaN 2018-01-04 3.0 2.0 NaN 2018-01-05 NaN 1.0 1.0 57.1 ms ± 228 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) a b c 2018-01-01 NaN NaN NaN 2018-01-02 1.0 NaN 1.0 2018-01-03 2.0 3.0 NaN 2018-01-04 3.0 2.0 NaN 2018-01-05 NaN 1.0 1.0 5.92 ms ± 368 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
print(ts.rolling(2).max()) %timeit big_ts.rolling(2).max() print(ts.vbt.rolling_max(2)) %timeit big_ts.vbt.rolling_max(2)
a b c 2018-01-01 NaN NaN NaN 2018-01-02 2.0 NaN 2.0 2018-01-03 3.0 4.0 NaN 2018-01-04 4.0 3.0 NaN 2018-01-05 NaN 2.0 2.0 56.8 ms ± 191 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) a b c 2018-01-01 NaN NaN NaN 2018-01-02 2.0 NaN 2.0 2018-01-03 3.0 4.0 NaN 2018-01-04 4.0 3.0 NaN 2018-01-05 NaN 2.0 2.0 5.48 ms ± 419 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
print(ts.rolling(2).mean()) %timeit big_ts.rolling(2).mean() print(ts.vbt.rolling_mean(2)) %timeit big_ts.vbt.rolling_mean(2)
a b c 2018-01-01 NaN NaN NaN 2018-01-02 1.5 NaN 1.5 2018-01-03 2.5 3.5 NaN 2018-01-04 3.5 2.5 NaN 2018-01-05 NaN 1.5 1.5 48.8 ms ± 112 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) a b c 2018-01-01 NaN NaN NaN 2018-01-02 1.5 NaN 1.5 2018-01-03 2.5 3.5 NaN 2018-01-04 3.5 2.5 NaN 2018-01-05 NaN 1.5 1.5 5.98 ms ± 328 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
print(ts.rolling(2).std()) %timeit big_ts.rolling(2).std() print(ts.vbt.rolling_std(2)) %timeit big_ts.vbt.rolling_std(2)
a b c 2018-01-01 NaN NaN NaN 2018-01-02 0.707107 NaN 0.707107 2018-01-03 0.707107 0.707107 NaN 2018-01-04 0.707107 0.707107 NaN 2018-01-05 NaN 0.707107 0.707107 64.4 ms ± 106 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) a b c 2018-01-01 NaN NaN NaN 2018-01-02 0.707107 NaN 0.707107 2018-01-03 0.707107 0.707107 NaN 2018-01-04 0.707107 0.707107 NaN 2018-01-05 NaN 0.707107 0.707107 9.16 ms ± 534 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
print(ts.ewm(span=2).mean()) %timeit big_ts.ewm(span=2).mean() print(ts.vbt.ewm_mean(2)) %timeit big_ts.vbt.ewm_mean(2)
a b c 2018-01-01 1.000000 NaN 1.000000 2018-01-02 1.750000 4.000000 1.750000 2018-01-03 2.615385 3.250000 1.750000 2018-01-04 3.550000 2.384615 1.967742 2018-01-05 3.550000 1.450000 1.267857 34.1 ms ± 176 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) a b c 2018-01-01 1.000000 NaN 1.000000 2018-01-02 1.750000 4.000000 1.750000 2018-01-03 2.615385 3.250000 1.750000 2018-01-04 3.550000 2.384615 1.967742 2018-01-05 3.550000 1.450000 1.267857 6.79 ms ± 18.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
print(ts.ewm(span=2).std()) %timeit big_ts.ewm(span=2).std() print(ts.vbt.ewm_std(2)) %timeit big_ts.vbt.ewm_std(2)
a b c 2018-01-01 NaN NaN NaN 2018-01-02 0.707107 NaN 0.707107 2018-01-03 0.919866 0.707107 0.707107 2018-01-04 1.059753 0.919866 0.367607 2018-01-05 1.059753 1.059753 0.684914 19.8 ms ± 53.6 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) a b c 2018-01-01 NaN NaN NaN 2018-01-02 0.707107 NaN 0.707107 2018-01-03 0.919866 0.707107 0.707107 2018-01-04 1.059753 0.919866 0.367607 2018-01-05 1.059753 1.059753 0.684914 8.68 ms ± 15.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
print(ts.expanding().min()) %timeit big_ts.expanding().min() print(ts.vbt.expanding_min()) %timeit big_ts.vbt.expanding_min()
a b c 2018-01-01 1.0 NaN 1.0 2018-01-02 1.0 4.0 1.0 2018-01-03 1.0 3.0 1.0 2018-01-04 1.0 2.0 1.0 2018-01-05 1.0 1.0 1.0 33.9 ms ± 100 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) a b c 2018-01-01 1.0 NaN 1.0 2018-01-02 1.0 4.0 1.0 2018-01-03 1.0 3.0 1.0 2018-01-04 1.0 2.0 1.0 2018-01-05 1.0 1.0 1.0 4.5 ms ± 241 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
print(ts.expanding().max()) %timeit big_ts.expanding().max() print(ts.vbt.expanding_max()) %timeit big_ts.vbt.expanding_max()
a b c 2018-01-01 1.0 NaN 1.0 2018-01-02 2.0 4.0 2.0 2018-01-03 3.0 4.0 2.0 2018-01-04 4.0 4.0 2.0 2018-01-05 4.0 4.0 2.0 33.7 ms ± 102 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) a b c 2018-01-01 1.0 NaN 1.0 2018-01-02 2.0 4.0 2.0 2018-01-03 3.0 4.0 2.0 2018-01-04 4.0 4.0 2.0 2018-01-05 4.0 4.0 2.0 3.42 ms ± 618 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
print(ts.expanding().mean()) %timeit big_ts.expanding().mean() print(ts.vbt.expanding_mean()) %timeit big_ts.vbt.expanding_mean()
a b c 2018-01-01 1.0 NaN 1.000000 2018-01-02 1.5 4.0 1.500000 2018-01-03 2.0 3.5 1.500000 2018-01-04 2.5 3.0 1.666667 2018-01-05 2.5 2.5 1.500000 21.9 ms ± 68.2 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) a b c 2018-01-01 1.0 NaN 1.000000 2018-01-02 1.5 4.0 1.500000 2018-01-03 2.0 3.5 1.500000 2018-01-04 2.5 3.0 1.666667 2018-01-05 2.5 2.5 1.500000 5.28 ms ± 354 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
print(ts.expanding().std()) %timeit big_ts.expanding().std() print(ts.vbt.expanding_std()) %timeit big_ts.vbt.expanding_std()
a b c 2018-01-01 NaN NaN NaN 2018-01-02 0.707107 NaN 0.707107 2018-01-03 1.000000 0.707107 0.707107 2018-01-04 1.290994 1.000000 0.577350 2018-01-05 1.290994 1.290994 0.577350 34.7 ms ± 111 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) a b c 2018-01-01 NaN NaN NaN 2018-01-02 0.707107 NaN 0.707107 2018-01-03 1.000000 0.707107 0.707107 2018-01-04 1.290994 1.000000 0.577350 2018-01-05 1.290994 1.290994 0.577350 8.24 ms ± 102 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
pd_pow_nb = njit(lambda x: np.power(x, 2)) pow_nb = njit(lambda col_i, x: np.power(x, 2))
print(ts.apply(pd_pow_nb, axis=0, raw=True)) %timeit big_ts.apply(pd_pow_nb, axis=0, raw=True) print(ts.vbt.apply_along_axis(pow_nb)) %timeit big_ts.vbt.apply_along_axis(pow_nb)
a b c 2018-01-01 1.0 NaN 1.0 2018-01-02 4.0 16.0 4.0 2018-01-03 9.0 9.0 NaN 2018-01-04 16.0 4.0 4.0 2018-01-05 NaN 1.0 1.0 3.38 ms ± 98.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) a b c 2018-01-01 1.0 NaN 1.0 2018-01-02 4.0 16.0 4.0 2018-01-03 9.0 9.0 NaN 2018-01-04 16.0 4.0 4.0 2018-01-05 NaN 1.0 1.0 2.03 ms ± 327 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
pd_nanmean_nb = njit(lambda x: np.nanmean(x)) nanmean_nb = njit(lambda i, col, x: np.nanmean(x)) nanmean_matrix_nb = njit(lambda i, x: np.nanmean(x))
print(ts.rolling(2, min_periods=1).apply(pd_nanmean_nb, raw=True)) %timeit big_ts.rolling(2, min_periods=1).apply(pd_nanmean_nb, raw=True) print(ts.vbt.rolling_apply(2, nanmean_nb)) %timeit big_ts.vbt.rolling_apply(2, nanmean_nb) print(ts.vbt.rolling_apply(3, nanmean_matrix_nb, on_matrix=True)) %timeit big_ts.vbt.rolling_apply(3, nanmean_matrix_nb, on_matrix=True)
a b c 2018-01-01 1.0 NaN 1.0 2018-01-02 1.5 4.0 1.5 2018-01-03 2.5 3.5 2.0 2018-01-04 3.5 2.5 2.0 2018-01-05 4.0 1.5 1.5 483 ms ± 3.14 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) a b c 2018-01-01 NaN NaN NaN 2018-01-02 1.5 NaN 1.5 2018-01-03 2.5 3.5 NaN 2018-01-04 3.5 2.5 NaN 2018-01-05 NaN 1.5 1.5 6.04 ms ± 323 µs per loop (mean ± std. dev. of 7 runs, 1 loop each) a b c 2018-01-01 NaN NaN NaN 2018-01-02 NaN NaN NaN 2018-01-03 NaN NaN NaN 2018-01-04 2.75 2.75 2.75 2018-01-05 NaN NaN NaN 7.24 ms ± 82.7 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
print(ts.expanding(min_periods=1).apply(pd_nanmean_nb, raw=True)) %timeit big_ts.expanding(min_periods=1).apply(pd_nanmean_nb, raw=True) print(ts.vbt.expanding_apply(nanmean_nb)) %timeit big_ts.vbt.expanding_apply(nanmean_nb) print(ts.vbt.expanding_apply(nanmean_matrix_nb, on_matrix=True)) %timeit big_ts.vbt.expanding_apply(nanmean_matrix_nb, on_matrix=True)
a b c 2018-01-01 1.0 NaN 1.000000 2018-01-02 1.5 4.0 1.500000 2018-01-03 2.0 3.5 1.500000 2018-01-04 2.5 3.0 1.666667 2018-01-05 2.5 2.5 1.500000 1.55 s ± 3.23 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) a b c 2018-01-01 1.0 NaN 1.000000 2018-01-02 1.5 4.0 1.500000 2018-01-03 2.0 3.5 1.500000 2018-01-04 2.5 3.0 1.666667 2018-01-05 2.5 2.5 1.500000 1.09 s ± 8.34 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) a b c 2018-01-01 NaN NaN NaN 2018-01-02 2.000000 2.000000 2.000000 2018-01-03 2.285714 2.285714 2.285714 2018-01-04 2.400000 2.400000 2.400000 2018-01-05 2.166667 2.166667 2.166667 1.12 s ± 442 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
print(ts['a'].groupby(np.asarray([1, 1, 2, 2, 3])).apply(lambda x: pd_nanmean_nb(x.values))) %timeit big_ts.iloc[:, 0].groupby(np.random.randint(10, size=1000)).apply(lambda x: pd_nanmean_nb(x.values)) print(ts['a'].vbt.groupby_apply(np.asarray([1, 1, 2, 2, 3]), nanmean_nb)) %timeit big_ts.iloc[:, 0].vbt.groupby_apply(np.random.randint(10, size=1000), nanmean_nb) print(ts.groupby(np.asarray([1, 1, 2, 2, 3])).agg({ 'a': lambda x: pd_nanmean_nb(x.values), 'b': lambda x: pd_nanmean_nb(x.values), 'c': lambda x: pd_nanmean_nb(x.values)})) # any clean way to do column-wise grouping in pandas? print(ts.vbt.groupby_apply(np.asarray([1, 1, 2, 2, 3]), nanmean_nb)) %timeit big_ts.vbt.groupby_apply(np.random.randint(10, size=1000), nanmean_nb) print(ts.vbt.groupby_apply(np.asarray([1, 1, 2, 2, 3]), nanmean_matrix_nb, on_matrix=True)) %timeit big_ts.vbt.groupby_apply(np.random.randint(10, size=1000), nanmean_matrix_nb, on_matrix=True)
1 1.5 2 3.5 3 NaN Name: a, dtype: float64 471 µs ± 456 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each) 1 1.5 2 3.5 3 NaN Name: a, dtype: float64 1.46 ms ± 3.84 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each) a b c 1 1.5 4.0 1.5 2 3.5 2.5 2.0 3 NaN 1.0 1.0 a b c 1 1.5 4.0 1.5 2 3.5 2.5 2.0 3 NaN 1.0 1.0 4.85 ms ± 18.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) a b c 1 2.0 2.0 2.0 2 2.8 2.8 2.8 3 1.0 1.0 1.0 3.15 ms ± 223 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
print(ts['a'].resample('1h').apply(lambda x: pd_nanmean_nb(x.values))) %timeit big_ts.iloc[:, 0].resample('1h').apply(lambda x: pd_nanmean_nb(x.values)) print(ts['a'].vbt.resample_apply('1h', nanmean_nb)) %timeit big_ts.iloc[:, 0].vbt.resample_apply('1h', nanmean_nb) print(ts.resample('1h').apply(lambda x: pd_nanmean_nb(x.values))) %timeit big_ts.iloc[:, :100].resample('1h').apply(lambda x: pd_nanmean_nb(x.values)) print(ts.vbt.resample_apply('1h', nanmean_nb)) %timeit big_ts.vbt.resample_apply('1h', nanmean_nb) print(ts.vbt.resample_apply('1h', nanmean_matrix_nb, on_matrix=True)) %timeit big_ts.vbt.resample_apply('1h', nanmean_matrix_nb, on_matrix=True)
2018-01-01 00:00:00 1.0 2018-01-01 01:00:00 NaN 2018-01-01 02:00:00 NaN 2018-01-01 03:00:00 NaN 2018-01-01 04:00:00 NaN ... 2018-01-04 20:00:00 NaN 2018-01-04 21:00:00 NaN 2018-01-04 22:00:00 NaN 2018-01-04 23:00:00 NaN 2018-01-05 00:00:00 NaN Freq: H, Name: a, Length: 97, dtype: float64 96.6 ms ± 320 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) 2018-01-01 00:00:00 1.0 2018-01-01 01:00:00 NaN 2018-01-01 02:00:00 NaN 2018-01-01 03:00:00 NaN 2018-01-01 04:00:00 NaN ... 2018-01-04 20:00:00 NaN 2018-01-04 21:00:00 NaN 2018-01-04 22:00:00 NaN 2018-01-04 23:00:00 NaN 2018-01-05 00:00:00 NaN Freq: H, Name: a, Length: 97, dtype: float64 21.3 ms ± 456 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) a b c 2018-01-01 00:00:00 1.0 NaN 1.0 2018-01-01 01:00:00 NaN NaN NaN 2018-01-01 02:00:00 NaN NaN NaN 2018-01-01 03:00:00 NaN NaN NaN 2018-01-01 04:00:00 NaN NaN NaN ... ... ... ... 2018-01-04 20:00:00 NaN NaN NaN 2018-01-04 21:00:00 NaN NaN NaN 2018-01-04 22:00:00 NaN NaN NaN 2018-01-04 23:00:00 NaN NaN NaN 2018-01-05 00:00:00 NaN 1.0 1.0 [97 rows x 3 columns] 9.7 s ± 60.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) a b c 2018-01-01 00:00:00 1.0 NaN 1.0 2018-01-01 01:00:00 NaN NaN NaN 2018-01-01 02:00:00 NaN NaN NaN 2018-01-01 03:00:00 NaN NaN NaN 2018-01-01 04:00:00 NaN NaN NaN ... ... ... ... 2018-01-04 20:00:00 NaN NaN NaN 2018-01-04 21:00:00 NaN NaN NaN 2018-01-04 22:00:00 NaN NaN NaN 2018-01-04 23:00:00 NaN NaN NaN 2018-01-05 00:00:00 NaN 1.0 1.0 [97 rows x 3 columns] 238 ms ± 10.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) a b c 2018-01-01 00:00:00 1.0 1.0 1.0 2018-01-01 01:00:00 NaN NaN NaN 2018-01-01 02:00:00 NaN NaN NaN 2018-01-01 03:00:00 NaN NaN NaN 2018-01-01 04:00:00 NaN NaN NaN ... ... ... ... 2018-01-04 20:00:00 NaN NaN NaN 2018-01-04 21:00:00 NaN NaN NaN 2018-01-04 22:00:00 NaN NaN NaN 2018-01-04 23:00:00 NaN NaN NaN 2018-01-05 00:00:00 1.0 1.0 1.0 [97 rows x 3 columns] 121 ms ± 1.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
print(ts['a'].resample('3d').apply(lambda x: pd_nanmean_nb(x.values))) %timeit big_ts.iloc[:, 0].resample('3d').apply(lambda x: pd_nanmean_nb(x.values)) print(ts['a'].vbt.resample_apply('3d', nanmean_nb)) %timeit big_ts.iloc[:, 0].vbt.resample_apply('3d', nanmean_nb) print(ts.resample('3d').apply(lambda x: pd_nanmean_nb(x.values))) %timeit big_ts.resample('3d').apply(lambda x: pd_nanmean_nb(x.values)) print(ts.vbt.resample_apply('3d', nanmean_nb)) %timeit big_ts.vbt.resample_apply('3d', nanmean_nb) print(ts.vbt.resample_apply('3d', nanmean_matrix_nb, on_matrix=True)) %timeit big_ts.vbt.resample_apply('3d', nanmean_matrix_nb, on_matrix=True)
2018-01-01 2.0 2018-01-04 4.0 Freq: 3D, Name: a, dtype: float64 1.45 ms ± 14 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each) 2018-01-01 2.0 2018-01-04 4.0 Freq: 3D, Name: a, dtype: float64 3.81 ms ± 19.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) a b c 2018-01-01 2.0 3.5 1.5 2018-01-04 4.0 1.5 1.5 1.29 s ± 6.93 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) a b c 2018-01-01 2.0 3.5 1.5 2018-01-04 4.0 1.5 1.5 42.9 ms ± 256 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) a b c 2018-01-01 2.285714 2.285714 2.285714 2018-01-04 2.000000 2.000000 2.000000 6.18 ms ± 209 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
print(ts['a'].resample('1w').apply(lambda x: pd_nanmean_nb(x.values))) %timeit big_ts.iloc[:, 0].resample('1w').apply(lambda x: pd_nanmean_nb(x.values)) print(ts['a'].vbt.resample_apply('1w', nanmean_nb)) %timeit big_ts.iloc[:, 0].vbt.resample_apply('1w', nanmean_nb) print(ts.resample('1w').apply(lambda x: pd_nanmean_nb(x.values))) %timeit big_ts.resample('1w').apply(lambda x: pd_nanmean_nb(x.values)) print(ts.vbt.resample_apply('1w', nanmean_nb)) %timeit big_ts.vbt.resample_apply('1w', nanmean_nb) print(ts.vbt.resample_apply('1w', nanmean_matrix_nb, on_matrix=True)) %timeit big_ts.vbt.resample_apply('1w', nanmean_matrix_nb, on_matrix=True)
2018-01-07 2.5 Freq: W-SUN, Name: a, dtype: float64 2 ms ± 13.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each) 2018-01-07 2.5 Freq: W-SUN, Name: a, dtype: float64 4.08 ms ± 16.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) a b c 2018-01-07 2.5 2.5 1.5 730 ms ± 16.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) a b c 2018-01-07 2.5 2.5 1.5 21.5 ms ± 347 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) a b c 2018-01-07 2.166667 2.166667 2.166667 5.85 ms ± 47.7 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
mult_nb = njit(lambda i, col, x: x * 2) print(ts.applymap(lambda x: x * 2)) %timeit big_ts.applymap(lambda x: x * 2) print(ts['a'].vbt.applymap(mult_nb)) %timeit big_ts.iloc[:, 0].vbt.applymap(mult_nb) print(ts.vbt.applymap(mult_nb)) %timeit big_ts.vbt.applymap(mult_nb)
a b c 2018-01-01 2.0 NaN 2.0 2018-01-02 4.0 8.0 4.0 2018-01-03 6.0 6.0 NaN 2018-01-04 8.0 4.0 4.0 2018-01-05 NaN 2.0 2.0 241 ms ± 1.75 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) 2018-01-01 2.0 2018-01-02 4.0 2018-01-03 6.0 2018-01-04 8.0 2018-01-05 NaN Freq: D, Name: a, dtype: float64 1.1 ms ± 3.65 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each) a b c 2018-01-01 2.0 NaN 2.0 2018-01-02 4.0 8.0 4.0 2018-01-03 6.0 6.0 NaN 2018-01-04 8.0 4.0 4.0 2018-01-05 NaN 2.0 2.0 4.28 ms ± 42.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
greater_nb = njit(lambda i, col, x: x > 2) print(ts.applymap(lambda x: x if x > 2 else np.nan)) %timeit big_ts.applymap(lambda x: x if x > 2 else np.nan) print(ts['a'].vbt.filter(greater_nb)) %timeit big_ts.iloc[:, 0].vbt.filter(greater_nb) print(ts.vbt.filter(greater_nb)) %timeit big_ts.vbt.filter(greater_nb)
a b c 2018-01-01 NaN NaN NaN 2018-01-02 NaN 4.0 NaN 2018-01-03 3.0 3.0 NaN 2018-01-04 4.0 NaN NaN 2018-01-05 NaN NaN NaN 258 ms ± 3.52 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) 2018-01-01 NaN 2018-01-02 NaN 2018-01-03 3.0 2018-01-04 4.0 2018-01-05 NaN Freq: D, Name: a, dtype: float64 1.11 ms ± 1.61 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each) a b c 2018-01-01 NaN NaN NaN 2018-01-02 NaN 4.0 NaN 2018-01-03 3.0 3.0 NaN 2018-01-04 4.0 NaN NaN 2018-01-05 NaN NaN NaN 6.85 ms ± 78.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
every_2th_nb = njit(lambda col, a: a[::2]) sum_nb = njit(lambda col, a: np.nansum(a)) print(ts['a'].vbt.apply_and_reduce(every_2th_nb, sum_nb)) %timeit big_ts.iloc[:, 0].vbt.apply_and_reduce(every_2th_nb, sum_nb) print(ts.vbt.apply_and_reduce(every_2th_nb, sum_nb)) %timeit big_ts.vbt.apply_and_reduce(every_2th_nb, sum_nb) print(ts.vbt.apply_and_reduce(every_2th_nb, sum_nb, wrap_kwargs=dict(to_timedelta=True))) %timeit big_ts.vbt.apply_and_reduce(every_2th_nb, sum_nb, wrap_kwargs=dict(to_timedelta=True))
4.0 1.12 ms ± 12.8 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each) a 4.0 b 4.0 c 2.0 Name: apply_and_reduce, dtype: float32 1.14 ms ± 18.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each) a 4 days b 4 days c 2 days Name: apply_and_reduce, dtype: timedelta64[ns] 1.22 ms ± 746 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
print(ts.apply(np.nansum, axis=0)) %timeit big_ts.apply(np.nansum, axis=0) print(ts['a'].vbt.reduce(sum_nb)) %timeit big_ts.iloc[:, 0].vbt.reduce(sum_nb) print(ts.vbt.reduce(sum_nb)) %timeit big_ts.vbt.reduce(sum_nb) print(ts.vbt.reduce(sum_nb, wrap_kwargs=dict(to_timedelta=True))) %timeit big_ts.vbt.reduce(sum_nb, wrap_kwargs=dict(to_timedelta=True))
a 10.0 b 10.0 c 6.0 dtype: float32 29.6 ms ± 200 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) 10.0 1.09 ms ± 1.12 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each) a 10.0 b 10.0 c 6.0 Name: reduce, dtype: float32 2.25 ms ± 1.61 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) a 10 days b 10 days c 6 days Name: reduce, dtype: timedelta64[ns] 2.35 ms ± 1.55 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
argmax_nb = njit(lambda col, a: np.argmax(a)) print(ts['a'].vbt.reduce(argmax_nb, returns_idx=True)) %timeit big_ts.iloc[:, 0].vbt.reduce(argmax_nb, returns_idx=True) print(ts.vbt.reduce(argmax_nb, returns_idx=True)) %timeit big_ts.vbt.reduce(argmax_nb, returns_idx=True)
2018-01-05 00:00:00 1.12 ms ± 12.1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each) a 2018-01-05 b 2018-01-01 c 2018-01-03 Name: reduce, dtype: datetime64[ns] 7.52 ms ± 69.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
@njit def min_and_max_nb(col, a): result = np.empty(2) result[0] = np.nanmin(a) result[1] = np.nanmax(a) return result print(ts.apply(lambda x: np.asarray([np.min(x), np.max(x)]), axis=0)) %timeit big_ts.apply(lambda x: np.asarray([np.min(x), np.max(x)]), axis=0) print(ts['a'].vbt.reduce( min_and_max_nb, returns_array=True, wrap_kwargs=dict(name_or_index=['min', 'max']))) %timeit big_ts.iloc[:, 0].vbt.reduce(\ min_and_max_nb, returns_array=True, wrap_kwargs=dict(name_or_index=['min', 'max'])) print(ts.vbt.reduce( min_and_max_nb, returns_array=True, wrap_kwargs=dict(name_or_index=['min', 'max']))) %timeit big_ts.vbt.reduce(\ min_and_max_nb, returns_array=True, wrap_kwargs=dict(name_or_index=['min', 'max'])) print(ts.vbt.reduce( min_and_max_nb, returns_array=True, wrap_kwargs=dict(name_or_index=['min', 'max'], to_timedelta=True))) %timeit big_ts.vbt.reduce(\ min_and_max_nb, returns_array=True, wrap_kwargs=dict(name_or_index=['min', 'max'], to_timedelta=True))
a b c 0 1.0 1.0 1.0 1 4.0 4.0 2.0 65 ms ± 443 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) min 1.0 max 4.0 Name: a, dtype: float64 1.21 ms ± 135 µs per loop (mean ± std. dev. of 7 runs, 1 loop each) a b c min 1.0 1.0 1.0 max 4.0 4.0 2.0 4.11 ms ± 69.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) a b c min 1 days 1 days 1 days max 4 days 4 days 2 days 4.23 ms ± 83.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
print(ts.vbt.reduce(sum_nb, group_by=group_by)) %timeit big_ts.vbt.reduce(sum_nb, group_by=big_group_by) print(ts.vbt.reduce(sum_nb, group_by=group_by, flatten=True, order='C')) %timeit big_ts.vbt.reduce(sum_nb, group_by=big_group_by, flatten=True, order='C') print(ts.vbt.reduce(sum_nb, group_by=group_by, flatten=True, order='F')) %timeit big_ts.vbt.reduce(sum_nb, group_by=big_group_by, flatten=True, order='F') print(ts.vbt.reduce( min_and_max_nb, group_by=group_by, returns_array=True, wrap_kwargs=dict(name_or_index=['min', 'max']))) %timeit big_ts.vbt.reduce(\ min_and_max_nb, group_by=big_group_by, returns_array=True, \ wrap_kwargs=dict(name_or_index=['min', 'max'])) print(ts.vbt.reduce( min_and_max_nb, group_by=group_by, returns_array=True, flatten=True, order='C', wrap_kwargs=dict(name_or_index=['min', 'max']))) %timeit big_ts.vbt.reduce(\ min_and_max_nb, group_by=big_group_by, returns_array=True, flatten=True, order='C', \ wrap_kwargs=dict(name_or_index=['min', 'max'])) print(ts.vbt.reduce( min_and_max_nb, group_by=group_by, returns_array=True, flatten=True, order='F', wrap_kwargs=dict(name_or_index=['min', 'max']))) %timeit big_ts.vbt.reduce(\ min_and_max_nb, group_by=big_group_by, returns_array=True, flatten=True, order='F', \ wrap_kwargs=dict(name_or_index=['min', 'max']))
0 20.0 1 6.0 Name: reduce, dtype: float32 3.41 ms ± 98.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) 0 20.0 1 6.0 Name: reduce, dtype: float32 4.57 ms ± 180 µs per loop (mean ± std. dev. of 7 runs, 1 loop each) 0 20.0 1 6.0 Name: reduce, dtype: float32 4.44 ms ± 108 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) 0 1 min 1.0 1.0 max 4.0 2.0 5.65 ms ± 387 µs per loop (mean ± std. dev. of 7 runs, 1 loop each) 0 1 min 1.0 1.0 max 4.0 2.0 5.97 ms ± 241 µs per loop (mean ± std. dev. of 7 runs, 1 loop each) 0 1 min 1.0 1.0 max 4.0 2.0 5.58 ms ± 68.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
print(ts.vbt.squeeze_grouped(nanmean_nb, group_by=group_by)) %timeit big_ts.vbt.squeeze_grouped(nanmean_nb, group_by=big_group_by)
0 1 2018-01-01 1.0 1.0 2018-01-02 3.0 2.0 2018-01-03 3.0 NaN 2018-01-04 3.0 2.0 2018-01-05 1.0 1.0 3.48 ms ± 8.59 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
print(ts.vbt.flatten_grouped(group_by=group_by, order='F')) %timeit big_ts.vbt.flatten_grouped(group_by=big_group_by, order='F') print(ts.vbt.flatten_grouped(group_by=group_by, order='C')) %timeit big_ts.vbt.flatten_grouped(group_by=big_group_by, order='C')
0 1 2018-01-01 1.0 1.0 2018-01-02 2.0 2.0 2018-01-03 3.0 NaN 2018-01-04 4.0 2.0 2018-01-05 NaN 1.0 2018-01-01 NaN NaN 2018-01-02 4.0 NaN 2018-01-03 3.0 NaN 2018-01-04 2.0 NaN 2018-01-05 1.0 NaN 3.54 ms ± 525 µs per loop (mean ± std. dev. of 7 runs, 1 loop each) 0 1 2018-01-01 1.0 1.0 2018-01-01 NaN NaN 2018-01-02 2.0 2.0 2018-01-02 4.0 NaN 2018-01-03 3.0 NaN 2018-01-03 3.0 NaN 2018-01-04 4.0 2.0 2018-01-04 2.0 NaN 2018-01-05 NaN 1.0 2018-01-05 1.0 NaN 4.2 ms ± 60.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
print(ts.min()) %timeit big_ts.min() print(ts.vbt.min()) %timeit big_ts.vbt.min() print(ts.vbt.min(wrap_kwargs=dict(to_timedelta=True))) %timeit big_ts.vbt.min(wrap_kwargs=dict(to_timedelta=True)) print(ts.vbt.min(group_by=group_by)) %timeit big_ts.vbt.min(group_by=big_group_by)
a 1.0 b 1.0 c 1.0 dtype: float32 4.1 ms ± 4.69 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) a 1.0 b 1.0 c 1.0 Name: min, dtype: float32 2.83 ms ± 2.08 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) a 1 days b 1 days c 1 days Name: min, dtype: timedelta64[ns] 3.04 ms ± 93.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) 0 1.0 1 1.0 Name: min, dtype: float32 4.71 ms ± 305 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
print(ts.max()) %timeit big_ts.max() print(ts.vbt.max()) %timeit big_ts.vbt.max() print(ts.vbt.max(wrap_kwargs=dict(to_timedelta=True))) %timeit big_ts.vbt.max(wrap_kwargs=dict(to_timedelta=True)) print(ts.vbt.max(group_by=group_by)) %timeit big_ts.vbt.max(group_by=big_group_by)
a 4.0 b 4.0 c 2.0 dtype: float32 4.8 ms ± 89 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) a 4.0 b 4.0 c 2.0 Name: max, dtype: float32 3.39 ms ± 15.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) a 4 days b 4 days c 2 days Name: max, dtype: timedelta64[ns] 3.63 ms ± 134 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) 0 4.0 1 2.0 Name: max, dtype: float32 4.28 ms ± 468 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
print(ts.mean()) %timeit big_ts.mean() print(ts.vbt.mean()) %timeit big_ts.vbt.mean() print(ts.vbt.mean(wrap_kwargs=dict(to_timedelta=True))) %timeit big_ts.vbt.mean(wrap_kwargs=dict(to_timedelta=True)) print(ts.vbt.mean(group_by=group_by)) %timeit big_ts.vbt.mean(group_by=big_group_by)
a 2.5 b 2.5 c 1.5 dtype: float32 2.53 ms ± 6.77 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) a 2.5 b 2.5 c 1.5 Name: mean, dtype: float32 1.23 ms ± 15.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each) a 2 days 12:00:00 b 2 days 12:00:00 c 1 days 12:00:00 Name: mean, dtype: timedelta64[ns] 1.38 ms ± 21.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each) 0 2.5 1 1.5 Name: mean, dtype: float64 4.31 ms ± 140 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
print(ts.std()) %timeit big_ts.std() print(ts.vbt.std()) %timeit big_ts.vbt.std() print(ts.vbt.std(wrap_kwargs=dict(to_timedelta=True))) %timeit big_ts.vbt.std(wrap_kwargs=dict(to_timedelta=True)) print(ts.vbt.std(group_by=group_by)) %timeit big_ts.vbt.std(group_by=big_group_by)
a 1.290994 b 1.290994 c 0.577350 dtype: float32 3.44 ms ± 3.03 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) a 1.290994 b 1.290994 c 0.577350 Name: std, dtype: float32 2.16 ms ± 1.77 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) a 1 days 06:59:01.916656494 b 1 days 06:59:01.916656494 c 0 days 13:51:23.062362670 Name: std, dtype: timedelta64[ns] 2.68 ms ± 302 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) 0 1.195229 1 0.577350 Name: std, dtype: float64 7.32 ms ± 99.8 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
print((~ts.isnull()).sum()) %timeit (~big_ts.isnull()).sum() print(ts.vbt.count()) %timeit big_ts.vbt.count() print(ts.vbt.count(wrap_kwargs=dict(to_timedelta=True))) %timeit big_ts.vbt.count(wrap_kwargs=dict(to_timedelta=True)) print(ts.vbt.count(group_by=group_by)) %timeit big_ts.vbt.count(group_by=big_group_by)
a 4 b 4 c 4 dtype: int64 2.43 ms ± 3.33 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) a 4 b 4 c 4 Name: count, dtype: int64 1.03 ms ± 24.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each) a 4 days b 4 days c 4 days Name: count, dtype: timedelta64[ns] 1.12 ms ± 2.34 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each) 0 8 1 4 Name: count, dtype: int64 2.71 ms ± 115 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
print(ts.idxmin()) %timeit big_ts.idxmin() print(ts.vbt.idxmin()) %timeit big_ts.vbt.idxmin() print(ts.vbt.idxmin(group_by=group_by)) %timeit big_ts.vbt.idxmin(group_by=big_group_by)
a 2018-01-01 b 2018-01-05 c 2018-01-01 dtype: datetime64[ns] 10.1 ms ± 116 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) a 2018-01-01 b 2018-01-05 c 2018-01-01 Name: idxmin, dtype: datetime64[ns] 5.18 ms ± 58.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) 0 2018-01-01 1 2018-01-01 Name: idxmin, dtype: datetime64[ns] 7.5 ms ± 208 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
print(ts.idxmax()) %timeit big_ts.idxmax() print(ts.vbt.idxmax()) %timeit big_ts.vbt.idxmax() print(ts.vbt.idxmax(group_by=group_by)) %timeit big_ts.vbt.idxmax(group_by=big_group_by)
a 2018-01-04 b 2018-01-02 c 2018-01-02 dtype: datetime64[ns] 10.4 ms ± 187 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) a 2018-01-04 b 2018-01-02 c 2018-01-02 Name: idxmax, dtype: datetime64[ns] 4.99 ms ± 318 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) 0 2018-01-02 1 2018-01-02 Name: idxmax, dtype: datetime64[ns] 8.04 ms ± 218 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
print(ts.describe(percentiles=np.arange(0, 1, 0.1))) %timeit big_ts.describe(percentiles=np.arange(0, 1, 0.1)) print(ts.vbt.describe(percentiles=np.arange(0, 1, 0.1))) %timeit big_ts.vbt.describe(percentiles=np.arange(0, 1, 0.1)) print(ts.vbt.describe(percentiles=np.arange(0, 1, 0.1), group_by=group_by)) %timeit big_ts.vbt.describe(percentiles=np.arange(0, 1, 0.1), group_by=big_group_by)
a b c count 4.000000 4.000000 4.00000 mean 2.500000 2.500000 1.50000 std 1.290994 1.290994 0.57735 min 1.000000 1.000000 1.00000 0% 1.000000 1.000000 1.00000 10% 1.300000 1.300000 1.00000 20% 1.600000 1.600000 1.00000 30% 1.900000 1.900000 1.00000 40% 2.200000 2.200000 1.20000 50% 2.500000 2.500000 1.50000 60% 2.800000 2.800000 1.80000 70% 3.100000 3.100000 2.00000 80% 3.400000 3.400000 2.00000 90% 3.700000 3.700000 2.00000 max 4.000000 4.000000 2.00000 759 ms ± 3.21 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) a b c count 4.000000 4.000000 4.00000 mean 2.500000 2.500000 1.50000 std 1.290994 1.290994 0.57735 min 1.000000 1.000000 1.00000 0% 1.000000 1.000000 1.00000 10% 1.300000 1.300000 1.00000 20% 1.600000 1.600000 1.00000 30% 1.900000 1.900000 1.00000 40% 2.200000 2.200000 1.20000 50% 2.500000 2.500000 1.50000 60% 2.800000 2.800000 1.80000 70% 3.100000 3.100000 2.00000 80% 3.400000 3.400000 2.00000 90% 3.700000 3.700000 2.00000 max 4.000000 4.000000 2.00000 72 ms ± 11.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) 0 1 count 8.000000 4.00000 mean 2.500000 1.50000 std 1.195229 0.57735 min 1.000000 1.00000 0% 1.000000 1.00000 10% 1.000000 1.00000 20% 1.400000 1.00000 30% 2.000000 1.00000 40% 2.000000 1.20000 50% 2.500000 1.50000 60% 3.000000 1.80000 70% 3.000000 2.00000 80% 3.600000 2.00000 90% 4.000000 2.00000 max 4.000000 2.00000 62.4 ms ± 565 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
print(ts['a'].vbt.stats()) %timeit big_ts[0].vbt.stats(silence_warnings=True) print(ts.vbt.stats()) %timeit big_ts.vbt.stats(silence_warnings=True) print(ts.vbt( mapping={x: 'test_' + str(x) for x in np.unique(ts.values.flatten())} ).stats()) %timeit big_ts.vbt(\ mapping={x: 'test_' + str(x) for x in np.unique(big_ts.values.flatten())}\ ).stats(silence_warnings=True)
Start 2018-01-01 00:00:00 End 2018-01-05 00:00:00 Period 5 days 00:00:00 Count 4 Mean 2.5 Std 1.290994 Min 1.0 Median 2.5 Max 4.0 Min Index 2018-01-01 00:00:00 Max Index 2018-01-04 00:00:00 Name: a, dtype: object 5.7 ms ± 193 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) Start 2018-01-01 00:00:00 End 2018-01-05 00:00:00 Period 5 days 00:00:00 Count 4.0 Mean 2.166667 Std 1.053113 Min 1.0 Median 2.166667 Max 3.333333 Name: agg_func_mean, dtype: object
/Users/olegpolakow/miniconda3/lib/python3.7/site-packages/ipykernel_launcher.py:4: UserWarning: Metric 'idx_min' returned multiple values despite having no aggregation function after removing the cwd from sys.path. /Users/olegpolakow/miniconda3/lib/python3.7/site-packages/ipykernel_launcher.py:4: UserWarning: Metric 'idx_max' returned multiple values despite having no aggregation function after removing the cwd from sys.path.
36.8 ms ± 754 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) Start 2018-01-01 00:00:00 End 2018-01-05 00:00:00 Period 5 days 00:00:00 Value Counts: test_1.0 1.333333 Value Counts: test_2.0 1.333333 Value Counts: test_3.0 0.666667 Value Counts: test_4.0 0.666667 Value Counts: test_nan 1.0 Name: agg_func_mean, dtype: object 44.4 ms ± 458 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
print(ts / ts.expanding().max() - 1) %timeit big_ts / big_ts.expanding().max() print(ts.vbt.drawdown()) %timeit big_ts.vbt.drawdown()
a b c 2018-01-01 0.0 NaN 0.0 2018-01-02 0.0 0.00 0.0 2018-01-03 0.0 -0.25 NaN 2018-01-04 0.0 -0.50 0.0 2018-01-05 NaN -0.75 -0.5 36.3 ms ± 399 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) a b c 2018-01-01 0.0 NaN 0.0 2018-01-02 0.0 0.00 0.0 2018-01-03 0.0 -0.25 NaN 2018-01-04 0.0 -0.50 0.0 2018-01-05 NaN -0.75 -0.5
/Users/olegpolakow/Documents/SourceTree/vectorbt/vectorbt/generic/accessors.py:1143: RuntimeWarning: invalid value encountered in true_divide out = self.to_2d_array() / nb.expanding_max_nb(self.to_2d_array()) - 1
4.95 ms ± 616 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
print(ts.vbt.drawdowns) %timeit big_ts.vbt.drawdowns
Drawdowns(**Config({ "wrapper": "<vectorbt.base.array_wrapper.ArrayWrapper object at 0x7fda7ee48400> of shape (5, 3)", "records_arr": "<numpy.ndarray object at 0x7fd9f8227210> of shape (2,)", "idx_field": "end_idx", "ts": "<pandas.core.frame.DataFrame object at 0x7fda7db4a278> of shape (5, 3)" })) 7.11 ms ± 62.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
print(ts.vbt.to_mapped().values) print(ts.vbt.to_mapped().col_arr) print(ts.vbt.to_mapped().idx_arr) print(ts.vbt.mean(), ts.vbt.to_mapped().mean()) %timeit big_ts.vbt.to_mapped() print(ts.vbt.to_mapped(dropna=False).values) print(ts.vbt.to_mapped(dropna=False).col_arr) print(ts.vbt.to_mapped(dropna=False).idx_arr) print(ts.vbt.mean(), ts.vbt.to_mapped(dropna=False).mean()) %timeit big_ts.vbt.to_mapped(dropna=False)
[1. 2. 3. 4. 4. 3. 2. 1. 1. 2. 2. 1.] [0 0 0 0 1 1 1 1 2 2 2 2] [0 1 2 3 1 2 3 4 0 1 3 4] a 2.5 b 2.5 c 1.5 Name: mean, dtype: float32 a 2.5 b 2.5 c 1.5 Name: mean, dtype: float64 13.5 ms ± 611 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) [ 1. 2. 3. 4. nan nan 4. 3. 2. 1. 1. 2. nan 2. 1.] [0 0 0 0 0 1 1 1 1 1 2 2 2 2 2] [0 1 2 3 4 0 1 2 3 4 0 1 2 3 4] a 2.5 b 2.5 c 1.5 Name: mean, dtype: float32 a 2.5 b 2.5 c 1.5 Name: mean, dtype: float64 12.6 ms ± 403 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
print(ts['a'].vbt.range_split(n=2)[0]) %timeit big_ts.iloc[:, 0].vbt.range_split(n=100) print(ts.vbt.range_split(n=2)[0]) %timeit big_ts.vbt.range_split(n=100) print(ts['a'].vbt.range_split(range_len=2)[0]) %timeit big_ts.iloc[:, 0].vbt.range_split(range_len=100) print(ts.vbt.range_split(range_len=2)[0]) %timeit big_ts.vbt.range_split(range_len=100) print(ts.vbt.range_split(start_idxs=[0, 1], end_idxs=[3, 4])[0]) print(ts.vbt.range_split(start_idxs=ts.index[[0, 1]], end_idxs=ts.index[[2, 3]])[0])
split_idx 0 1 0 1.0 4.0 1 2.0 NaN 19 ms ± 506 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) split_idx 0 1 a b c a b c 0 1.0 NaN 1.0 4.0 2.0 2.0 1 2.0 4.0 2.0 NaN 1.0 1.0 26.8 ms ± 575 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) split_idx 0 1 2 3 0 1.0 2.0 3.0 4.0 1 2.0 3.0 4.0 NaN 153 ms ± 12.4 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) split_idx 0 1 2 3 a b c a b c a b c a b c 0 1.0 NaN 1.0 2.0 4.0 2.0 3.0 3.0 NaN 4.0 2.0 2.0 1 2.0 4.0 2.0 3.0 3.0 NaN 4.0 2.0 2.0 NaN 1.0 1.0 874 ms ± 82.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) split_idx 0 1 a b c a b c 0 1.0 NaN 1.0 2.0 4.0 2.0 1 2.0 4.0 2.0 3.0 3.0 NaN 2 3.0 3.0 NaN 4.0 2.0 2.0 3 4.0 2.0 2.0 NaN 1.0 1.0 split_idx 0 1 a b c a b c 0 1.0 NaN 1.0 2.0 4.0 2.0 1 2.0 4.0 2.0 3.0 3.0 NaN 2 3.0 3.0 NaN 4.0 2.0 2.0
ts.vbt.range_split(start_idxs=[0, 1], end_idxs=[3, 4], plot=True).show_svg()
Image in a Jupyter notebook
ts['a'].vbt.plot().show_svg() ts.vbt.plot().show_svg()
Image in a Jupyter notebookImage in a Jupyter notebook
ts['a'].vbt.scatterplot().show_svg() ts.vbt.scatterplot().show_svg()
Image in a Jupyter notebookImage in a Jupyter notebook
ts['a'].vbt.lineplot().show_svg() ts.vbt.lineplot().show_svg()
Image in a Jupyter notebookImage in a Jupyter notebook
ts['a'].vbt.barplot().show_svg() ts.vbt.barplot().show_svg()
Image in a Jupyter notebookImage in a Jupyter notebook
ts['a'].vbt.histplot().show_svg() ts.vbt.histplot().show_svg() ts.vbt.histplot(group_by=group_by).show_svg()
Image in a Jupyter notebookImage in a Jupyter notebookImage in a Jupyter notebook
ts['a'].vbt.boxplot().show_svg() ts.vbt.boxplot().show_svg() ts.vbt.boxplot(group_by=group_by).show_svg()
Image in a Jupyter notebookImage in a Jupyter notebookImage in a Jupyter notebook
ts['a'].vbt.plot_against(ts['b']).show_svg()
Image in a Jupyter notebook
ts['a'].vbt.overlay_with_heatmap(ts['b']).show_svg()
Image in a Jupyter notebook
multi_index = pd.MultiIndex.from_tuples([(1, 1), (2, 2), (3, 3)]) sr = pd.Series(np.arange(len(multi_index)), index=multi_index) print(sr) sr.vbt.heatmap().show_svg()
1 1 0 2 2 1 3 3 2 dtype: int64
Image in a Jupyter notebook
df = pd.DataFrame([[0, np.nan, np.nan], [np.nan, 1, np.nan], [np.nan, np.nan, 2]]) print(df) df.vbt.heatmap().show_svg()
0 1 2 0 0.0 NaN NaN 1 NaN 1.0 NaN 2 NaN NaN 2.0
Image in a Jupyter notebook
multi_index = pd.MultiIndex.from_tuples([(1, 1, 1), (2, 2, 2), (3, 3, 3)]) sr = pd.Series(np.arange(len(multi_index)), index=multi_index) print(sr) sr.vbt.volume().show_svg()
1 1 1 0 2 2 2 1 3 3 3 2 dtype: int64
/Users/olegpolakow/miniconda3/lib/python3.7/site-packages/ipykernel_launcher.py:4: UserWarning: Data contains NaNs. Use `fillna` argument or `show` method in case of visualization issues.
Image in a Jupyter notebook
multi_index = pd.MultiIndex.from_tuples([(1, 1, 1), (1, 2, 2), (1, 3, 3), (2, 3, 3), (2, 2, 2), (2, 1, 1)]) sr = pd.Series(np.arange(len(multi_index)), index=multi_index) print(sr) fig = sr.vbt.heatmap(slider_level=0).show()
1 1 1 0 2 2 1 3 3 2 2 3 3 3 2 2 4 1 1 5 dtype: int64
multi_index = pd.MultiIndex.from_tuples([(1, 1, 1, 1), (1, 2, 2, 2), (1, 3, 3, 3), (2, 3, 3, 3), (2, 2, 2, 2), (2, 1, 1, 1)]) sr = pd.Series(np.arange(len(multi_index)), index=multi_index) print(sr) sr.vbt.volume(slider_level=0).show()
1 1 1 1 0 2 2 2 1 3 3 3 2 2 3 3 3 3 2 2 2 4 1 1 1 5 dtype: int64
/Users/olegpolakow/miniconda3/lib/python3.7/site-packages/ipykernel_launcher.py:4: UserWarning: Data contains NaNs. Use `fillna` argument or `show` method in case of visualization issues.
ts['a'].vbt.qqplot().show_svg()
Image in a Jupyter notebook