from datetime import datetime, timedelta, timezone
import numpy as np
import pandas as pd
import pytest
import pytz
import vectorbt as vbt
from vectorbt.utils.config import merge_dicts
from vectorbt.utils.datetime_ import to_timezone
seed = 42
def setup_module():
vbt.settings.numba['check_func_suffix'] = True
vbt.settings.caching.enabled = False
vbt.settings.caching.whitelist = []
vbt.settings.caching.blacklist = []
def teardown_module():
vbt.settings.reset()
class MyData(vbt.Data):
@classmethod
def download_symbol(cls, symbol, shape=(5, 3), start_date=datetime(2020, 1, 1), columns=None, index_mask=None,
column_mask=None, return_arr=False, tz_localize=None, seed=seed):
np.random.seed(seed)
a = np.random.uniform(size=shape) + symbol
if return_arr:
return a
index = [start_date + timedelta(days=i) for i in range(a.shape[0])]
if a.ndim == 1:
sr = pd.Series(a, index=index, name=columns)
if index_mask is not None:
sr = sr.loc[index_mask]
if tz_localize is not None:
sr = sr.tz_localize(tz_localize)
return sr
df = pd.DataFrame(a, index=index, columns=columns)
if index_mask is not None:
df = df.loc[index_mask]
if column_mask is not None:
df = df.loc[:, column_mask]
if tz_localize is not None:
df = df.tz_localize(tz_localize)
return df
def update_symbol(self, symbol, n=1, **kwargs):
download_kwargs = self.select_symbol_kwargs(symbol, self.download_kwargs)
download_kwargs['start_date'] = self.data[symbol].index[-1]
shape = download_kwargs.pop('shape', (5, 3))
new_shape = (n, shape[1]) if len(shape) > 1 else (n,)
new_seed = download_kwargs.pop('seed', seed) + 1
kwargs = merge_dicts(download_kwargs, kwargs)
return self.download_symbol(symbol, shape=new_shape, seed=new_seed, **kwargs)
class TestData:
def test_config(self, tmp_path):
data = MyData.download([0, 1], shape=(5, 3), columns=['feat0', 'feat1', 'feat2'])
assert MyData.loads(data.dumps()) == data
data.save(tmp_path / 'data')
assert MyData.load(tmp_path / 'data') == data
def test_download(self):
pd.testing.assert_series_equal(
MyData.download(0, shape=(5,), return_arr=True).data[0],
pd.Series(
[
0.3745401188473625,
0.9507143064099162,
0.7319939418114051,
0.5986584841970366,
0.15601864044243652
]
)
)
pd.testing.assert_frame_equal(
MyData.download(0, shape=(5, 3), return_arr=True).data[0],
pd.DataFrame(
[
[0.3745401188473625, 0.9507143064099162, 0.7319939418114051],
[0.5986584841970366, 0.15601864044243652, 0.15599452033620265],
[0.05808361216819946, 0.8661761457749352, 0.6011150117432088],
[0.7080725777960455, 0.020584494295802447, 0.9699098521619943],
[0.8324426408004217, 0.21233911067827616, 0.18182496720710062]
]
)
)
index = pd.DatetimeIndex(
[
'2020-01-01 00:00:00',
'2020-01-02 00:00:00',
'2020-01-03 00:00:00',
'2020-01-04 00:00:00',
'2020-01-05 00:00:00'
],
freq='D',
tz=timezone.utc
)
pd.testing.assert_series_equal(
MyData.download(0, shape=(5,)).data[0],
pd.Series(
[
0.3745401188473625,
0.9507143064099162,
0.7319939418114051,
0.5986584841970366,
0.15601864044243652
],
index=index
)
)
pd.testing.assert_series_equal(
MyData.download(0, shape=(5,), columns='feat0').data[0],
pd.Series(
[
0.3745401188473625,
0.9507143064099162,
0.7319939418114051,
0.5986584841970366,
0.15601864044243652
],
index=index,
name='feat0'
)
)
pd.testing.assert_frame_equal(
MyData.download(0, shape=(5, 3)).data[0],
pd.DataFrame(
[
[0.3745401188473625, 0.9507143064099162, 0.7319939418114051],
[0.5986584841970366, 0.15601864044243652, 0.15599452033620265],
[0.05808361216819946, 0.8661761457749352, 0.6011150117432088],
[0.7080725777960455, 0.020584494295802447, 0.9699098521619943],
[0.8324426408004217, 0.21233911067827616, 0.18182496720710062]
],
index=index
)
)
pd.testing.assert_frame_equal(
MyData.download(0, shape=(5, 3), columns=['feat0', 'feat1', 'feat2']).data[0],
pd.DataFrame(
[
[0.3745401188473625, 0.9507143064099162, 0.7319939418114051],
[0.5986584841970366, 0.15601864044243652, 0.15599452033620265],
[0.05808361216819946, 0.8661761457749352, 0.6011150117432088],
[0.7080725777960455, 0.020584494295802447, 0.9699098521619943],
[0.8324426408004217, 0.21233911067827616, 0.18182496720710062]
],
index=index,
columns=pd.Index(['feat0', 'feat1', 'feat2'], dtype='object'))
)
pd.testing.assert_series_equal(
MyData.download([0, 1], shape=(5,)).data[0],
pd.Series(
[
0.3745401188473625,
0.9507143064099162,
0.7319939418114051,
0.5986584841970366,
0.15601864044243652
],
index=index
)
)
pd.testing.assert_series_equal(
MyData.download([0, 1], shape=(5,)).data[1],
pd.Series(
[
1.3745401188473625,
1.9507143064099162,
1.7319939418114051,
1.5986584841970366,
1.15601864044243652
],
index=index
)
)
pd.testing.assert_frame_equal(
MyData.download([0, 1], shape=(5, 3)).data[0],
pd.DataFrame(
[
[0.3745401188473625, 0.9507143064099162, 0.7319939418114051],
[0.5986584841970366, 0.15601864044243652, 0.15599452033620265],
[0.05808361216819946, 0.8661761457749352, 0.6011150117432088],
[0.7080725777960455, 0.020584494295802447, 0.9699098521619943],
[0.8324426408004217, 0.21233911067827616, 0.18182496720710062]
],
index=index
)
)
pd.testing.assert_frame_equal(
MyData.download([0, 1], shape=(5, 3)).data[1],
pd.DataFrame(
[
[1.3745401188473625, 1.9507143064099162, 1.7319939418114051],
[1.5986584841970366, 1.15601864044243652, 1.15599452033620265],
[1.05808361216819946, 1.8661761457749352, 1.6011150117432088],
[1.7080725777960455, 1.020584494295802447, 1.9699098521619943],
[1.8324426408004217, 1.21233911067827616, 1.18182496720710062]
],
index=index
)
)
index2 = pd.DatetimeIndex(
[
'2020-01-01 00:00:00',
'2020-01-02 00:00:00',
'2020-01-03 00:00:00',
'2020-01-04 00:00:00',
'2020-01-05 00:00:00'
],
freq='D',
tz=pytz.utc
).tz_convert(to_timezone('Europe/Berlin'))
pd.testing.assert_series_equal(
MyData.download(0, shape=(5,), tz_localize='UTC', tz_convert='Europe/Berlin').data[0],
pd.Series(
[
0.3745401188473625,
0.9507143064099162,
0.7319939418114051,
0.5986584841970366,
0.15601864044243652
],
index=index2
)
)
index_mask = vbt.symbol_dict({
0: [False, True, True, True, True],
1: [True, True, True, True, False]
})
pd.testing.assert_series_equal(
MyData.download([0, 1], shape=(5,), index_mask=index_mask, missing_index='nan').data[0],
pd.Series(
[
np.nan,
0.9507143064099162,
0.7319939418114051,
0.5986584841970366,
0.15601864044243652
],
index=index
)
)
pd.testing.assert_series_equal(
MyData.download([0, 1], shape=(5,), index_mask=index_mask, missing_index='nan').data[1],
pd.Series(
[
1.3745401188473625,
1.9507143064099162,
1.7319939418114051,
1.5986584841970366,
np.nan
],
index=index
)
)
pd.testing.assert_series_equal(
MyData.download([0, 1], shape=(5,), index_mask=index_mask, missing_index='drop').data[0],
pd.Series(
[
0.9507143064099162,
0.7319939418114051,
0.5986584841970366
],
index=index[1:4]
)
)
pd.testing.assert_series_equal(
MyData.download([0, 1], shape=(5,), index_mask=index_mask, missing_index='drop').data[1],
pd.Series(
[
1.9507143064099162,
1.7319939418114051,
1.5986584841970366
],
index=index[1:4]
)
)
column_mask = vbt.symbol_dict({
0: [False, True, True],
1: [True, True, False]
})
pd.testing.assert_frame_equal(
MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
missing_index='nan', missing_columns='nan').data[0],
pd.DataFrame(
[
[np.nan, np.nan, np.nan],
[np.nan, 0.15601864044243652, 0.15599452033620265],
[np.nan, 0.8661761457749352, 0.6011150117432088],
[np.nan, 0.020584494295802447, 0.9699098521619943],
[np.nan, 0.21233911067827616, 0.18182496720710062]
],
index=index
)
)
pd.testing.assert_frame_equal(
MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
missing_index='nan', missing_columns='nan').data[1],
pd.DataFrame(
[
[1.3745401188473625, 1.9507143064099162, np.nan],
[1.5986584841970366, 1.15601864044243652, np.nan],
[1.05808361216819946, 1.8661761457749352, np.nan],
[1.7080725777960455, 1.020584494295802447, np.nan],
[np.nan, np.nan, np.nan]
],
index=index
)
)
pd.testing.assert_frame_equal(
MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
missing_index='drop', missing_columns='drop').data[0],
pd.DataFrame(
[
[0.15601864044243652],
[0.8661761457749352],
[0.020584494295802447]
],
index=index[1:4],
columns=pd.Index([1], dtype='int64')
)
)
pd.testing.assert_frame_equal(
MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
missing_index='drop', missing_columns='drop').data[1],
pd.DataFrame(
[
[1.15601864044243652],
[1.8661761457749352],
[1.020584494295802447]
],
index=index[1:4],
columns=pd.Index([1], dtype='int64')
)
)
with pytest.raises(Exception):
MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
missing_index='raise', missing_columns='nan')
with pytest.raises(Exception):
MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
missing_index='nan', missing_columns='raise')
with pytest.raises(Exception):
MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
missing_index='test', missing_columns='nan')
with pytest.raises(Exception):
MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
missing_index='nan', missing_columns='test')
def test_update(self):
pd.testing.assert_series_equal(
MyData.download(0, shape=(5,), return_arr=True).update().data[0],
pd.Series(
[
0.3745401188473625,
0.9507143064099162,
0.7319939418114051,
0.5986584841970366,
0.11505456638977896
]
)
)
pd.testing.assert_series_equal(
MyData.download(0, shape=(5,), return_arr=True).update(n=2).data[0],
pd.Series(
[
0.3745401188473625,
0.9507143064099162,
0.7319939418114051,
0.5986584841970366,
0.11505456638977896,
0.6090665392794814
]
)
)
pd.testing.assert_frame_equal(
MyData.download(0, shape=(5, 3), return_arr=True).update().data[0],
pd.DataFrame(
[
[0.3745401188473625, 0.9507143064099162, 0.7319939418114051],
[0.5986584841970366, 0.15601864044243652, 0.15599452033620265],
[0.05808361216819946, 0.8661761457749352, 0.6011150117432088],
[0.7080725777960455, 0.020584494295802447, 0.9699098521619943],
[0.11505456638977896, 0.6090665392794814, 0.13339096418598828]
]
)
)
pd.testing.assert_frame_equal(
MyData.download(0, shape=(5, 3), return_arr=True).update(n=2).data[0],
pd.DataFrame(
[
[0.3745401188473625, 0.9507143064099162, 0.7319939418114051],
[0.5986584841970366, 0.15601864044243652, 0.15599452033620265],
[0.05808361216819946, 0.8661761457749352, 0.6011150117432088],
[0.7080725777960455, 0.020584494295802447, 0.9699098521619943],
[0.11505456638977896, 0.6090665392794814, 0.13339096418598828],
[0.24058961996534878, 0.3271390558111398, 0.8591374909485977]
]
)
)
index = pd.DatetimeIndex(
[
'2020-01-01 00:00:00',
'2020-01-02 00:00:00',
'2020-01-03 00:00:00',
'2020-01-04 00:00:00',
'2020-01-05 00:00:00'
],
freq='D',
tz=timezone.utc
)
pd.testing.assert_series_equal(
MyData.download(0, shape=(5,)).update().data[0],
pd.Series(
[
0.3745401188473625,
0.9507143064099162,
0.7319939418114051,
0.5986584841970366,
0.11505456638977896
],
index=index
)
)
updated_index = pd.DatetimeIndex(
[
'2020-01-01 00:00:00',
'2020-01-02 00:00:00',
'2020-01-03 00:00:00',
'2020-01-04 00:00:00',
'2020-01-05 00:00:00',
'2020-01-06 00:00:00'
],
freq='D',
tz=timezone.utc
)
pd.testing.assert_series_equal(
MyData.download(0, shape=(5,)).update(n=2).data[0],
pd.Series(
[
0.3745401188473625,
0.9507143064099162,
0.7319939418114051,
0.5986584841970366,
0.11505456638977896,
0.6090665392794814
],
index=updated_index
)
)
index2 = pd.DatetimeIndex(
[
'2020-01-01 00:00:00',
'2020-01-02 00:00:00',
'2020-01-03 00:00:00',
'2020-01-04 00:00:00',
'2020-01-05 00:00:00'
],
freq='D',
tz=pytz.utc
).tz_convert(to_timezone('Europe/Berlin'))
pd.testing.assert_series_equal(
MyData.download(0, shape=(5,), tz_localize='UTC', tz_convert='Europe/Berlin')
.update(tz_localize=None).data[0],
pd.Series(
[
0.3745401188473625,
0.9507143064099162,
0.7319939418114051,
0.5986584841970366,
0.11505456638977896
],
index=index2
)
)
index_mask = vbt.symbol_dict({
0: [False, True, True, True, True],
1: [True, True, True, True, False]
})
update_index_mask = vbt.symbol_dict({
0: [True],
1: [False]
})
pd.testing.assert_series_equal(
MyData.download([0, 1], shape=(5,), index_mask=index_mask, missing_index='nan')
.update(index_mask=update_index_mask).data[0],
pd.Series(
[
np.nan,
0.9507143064099162,
0.7319939418114051,
0.5986584841970366,
0.11505456638977896
],
index=index
)
)
pd.testing.assert_series_equal(
MyData.download([0, 1], shape=(5,), index_mask=index_mask, missing_index='nan')
.update(index_mask=update_index_mask).data[1],
pd.Series(
[
1.3745401188473625,
1.9507143064099162,
1.7319939418114051,
1.5986584841970366,
np.nan
],
index=index
)
)
update_index_mask2 = vbt.symbol_dict({
0: [True, False],
1: [False, True]
})
pd.testing.assert_series_equal(
MyData.download([0, 1], shape=(5,), index_mask=index_mask, missing_index='nan')
.update(n=2, index_mask=update_index_mask2).data[0],
pd.Series(
[
np.nan,
0.9507143064099162,
0.7319939418114051,
0.5986584841970366,
0.11505456638977896,
np.nan
],
index=updated_index
)
)
pd.testing.assert_series_equal(
MyData.download([0, 1], shape=(5,), index_mask=index_mask, missing_index='nan')
.update(n=2, index_mask=update_index_mask2).data[1],
pd.Series(
[
1.3745401188473625,
1.9507143064099162,
1.7319939418114051,
1.5986584841970366,
np.nan,
1.6090665392794814
],
index=updated_index
)
)
pd.testing.assert_series_equal(
MyData.download([0, 1], shape=(5,), index_mask=index_mask, missing_index='drop')
.update(index_mask=update_index_mask).data[0],
pd.Series(
[
0.9507143064099162,
0.7319939418114051,
0.5986584841970366
],
index=index[1:4]
)
)
pd.testing.assert_series_equal(
MyData.download([0, 1], shape=(5,), index_mask=index_mask, missing_index='drop')
.update(index_mask=update_index_mask).data[1],
pd.Series(
[
1.9507143064099162,
1.7319939418114051,
1.5986584841970366
],
index=index[1:4]
)
)
pd.testing.assert_series_equal(
MyData.download([0, 1], shape=(5,), index_mask=index_mask, missing_index='drop')
.update(n=2, index_mask=update_index_mask2).data[0],
pd.Series(
[
0.9507143064099162,
0.7319939418114051,
0.5986584841970366
],
index=index[1:4]
)
)
pd.testing.assert_series_equal(
MyData.download([0, 1], shape=(5,), index_mask=index_mask, missing_index='drop')
.update(n=2, index_mask=update_index_mask2).data[1],
pd.Series(
[
1.9507143064099162,
1.7319939418114051,
1.5986584841970366
],
index=index[1:4]
)
)
column_mask = vbt.symbol_dict({
0: [False, True, True],
1: [True, True, False]
})
pd.testing.assert_frame_equal(
MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
missing_index='nan', missing_columns='nan')
.update(index_mask=update_index_mask).data[0],
pd.DataFrame(
[
[np.nan, np.nan, np.nan],
[np.nan, 0.15601864044243652, 0.15599452033620265],
[np.nan, 0.8661761457749352, 0.6011150117432088],
[np.nan, 0.020584494295802447, 0.9699098521619943],
[np.nan, 0.6090665392794814, 0.13339096418598828]
],
index=index
)
)
pd.testing.assert_frame_equal(
MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
missing_index='nan', missing_columns='nan')
.update(index_mask=update_index_mask).data[1],
pd.DataFrame(
[
[1.3745401188473625, 1.9507143064099162, np.nan],
[1.5986584841970366, 1.15601864044243652, np.nan],
[1.05808361216819946, 1.8661761457749352, np.nan],
[1.7080725777960455, 1.020584494295802447, np.nan],
[np.nan, np.nan, np.nan]
],
index=index
)
)
pd.testing.assert_frame_equal(
MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
missing_index='nan', missing_columns='nan')
.update(n=2, index_mask=update_index_mask2).data[0],
pd.DataFrame(
[
[np.nan, np.nan, np.nan],
[np.nan, 0.15601864044243652, 0.15599452033620265],
[np.nan, 0.8661761457749352, 0.6011150117432088],
[np.nan, 0.020584494295802447, 0.9699098521619943],
[np.nan, 0.6090665392794814, 0.13339096418598828],
[np.nan, np.nan, np.nan]
],
index=updated_index
)
)
pd.testing.assert_frame_equal(
MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
missing_index='nan', missing_columns='nan')
.update(n=2, index_mask=update_index_mask2).data[1],
pd.DataFrame(
[
[1.3745401188473625, 1.9507143064099162, np.nan],
[1.5986584841970366, 1.15601864044243652, np.nan],
[1.05808361216819946, 1.8661761457749352, np.nan],
[1.7080725777960455, 1.020584494295802447, np.nan],
[np.nan, np.nan, np.nan],
[1.2405896199653488, 1.3271390558111398, np.nan]
],
index=updated_index
)
)
pd.testing.assert_frame_equal(
MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
missing_index='drop', missing_columns='drop')
.update(index_mask=update_index_mask).data[0],
pd.DataFrame(
[
[0.15601864044243652],
[0.8661761457749352],
[0.020584494295802447]
],
index=index[1:4],
columns=pd.Index([1], dtype='int64')
)
)
pd.testing.assert_frame_equal(
MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
missing_index='drop', missing_columns='drop')
.update(index_mask=update_index_mask).data[1],
pd.DataFrame(
[
[1.15601864044243652],
[1.8661761457749352],
[1.020584494295802447]
],
index=index[1:4],
columns=pd.Index([1], dtype='int64')
)
)
pd.testing.assert_frame_equal(
MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
missing_index='drop', missing_columns='drop')
.update(n=2, index_mask=update_index_mask2).data[0],
pd.DataFrame(
[
[0.15601864044243652],
[0.8661761457749352],
[0.020584494295802447]
],
index=index[1:4],
columns=pd.Index([1], dtype='int64')
)
)
pd.testing.assert_frame_equal(
MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
missing_index='drop', missing_columns='drop')
.update(n=2, index_mask=update_index_mask2).data[1],
pd.DataFrame(
[
[1.15601864044243652],
[1.8661761457749352],
[1.020584494295802447]
],
index=index[1:4],
columns=pd.Index([1], dtype='int64')
)
)
def test_concat(self):
index = pd.DatetimeIndex(
[
'2020-01-01 00:00:00',
'2020-01-02 00:00:00',
'2020-01-03 00:00:00',
'2020-01-04 00:00:00',
'2020-01-05 00:00:00'
],
freq='D',
tz=timezone.utc
)
pd.testing.assert_series_equal(
MyData.download(0, shape=(5,), columns='feat0').concat()['feat0'],
pd.Series(
[
0.3745401188473625,
0.9507143064099162,
0.7319939418114051,
0.5986584841970366,
0.15601864044243652
],
index=index,
name=0
)
)
pd.testing.assert_frame_equal(
MyData.download([0, 1], shape=(5,), columns='feat0').concat()['feat0'],
pd.DataFrame(
[
[0.3745401188473625, 1.3745401188473625],
[0.9507143064099162, 1.9507143064099162],
[0.7319939418114051, 1.7319939418114051],
[0.5986584841970366, 1.5986584841970366],
[0.15601864044243652, 1.15601864044243652]
],
index=index,
columns=pd.Index([0, 1], dtype='int64', name='symbol')
)
)
pd.testing.assert_series_equal(
MyData.download(0, shape=(5, 3), columns=['feat0', 'feat1', 'feat2']).concat()['feat0'],
pd.Series(
[
0.3745401188473625,
0.5986584841970366,
0.05808361216819946,
0.7080725777960455,
0.8324426408004217
],
index=index,
name=0
)
)
pd.testing.assert_series_equal(
MyData.download(0, shape=(5, 3), columns=['feat0', 'feat1', 'feat2']).concat()['feat1'],
pd.Series(
[
0.9507143064099162,
0.15601864044243652,
0.8661761457749352,
0.020584494295802447,
0.21233911067827616
],
index=index,
name=0
)
)
pd.testing.assert_series_equal(
MyData.download(0, shape=(5, 3), columns=['feat0', 'feat1', 'feat2']).concat()['feat2'],
pd.Series(
[
0.7319939418114051,
0.15599452033620265,
0.6011150117432088,
0.9699098521619943,
0.18182496720710062
],
index=index,
name=0
)
)
pd.testing.assert_frame_equal(
MyData.download([0, 1], shape=(5, 3), columns=['feat0', 'feat1', 'feat2']).concat()['feat0'],
pd.DataFrame(
[
[0.3745401188473625, 1.3745401188473625],
[0.5986584841970366, 1.5986584841970366],
[0.05808361216819946, 1.05808361216819946],
[0.7080725777960455, 1.7080725777960455],
[0.8324426408004217, 1.8324426408004217]
],
index=index,
columns=pd.Index([0, 1], dtype='int64', name='symbol')
)
)
pd.testing.assert_frame_equal(
MyData.download([0, 1], shape=(5, 3), columns=['feat0', 'feat1', 'feat2']).concat()['feat1'],
pd.DataFrame(
[
[0.9507143064099162, 1.9507143064099162],
[0.15601864044243652, 1.15601864044243652],
[0.8661761457749352, 1.8661761457749352],
[0.020584494295802447, 1.020584494295802447],
[0.21233911067827616, 1.21233911067827616]
],
index=index,
columns=pd.Index([0, 1], dtype='int64', name='symbol')
)
)
pd.testing.assert_frame_equal(
MyData.download([0, 1], shape=(5, 3), columns=['feat0', 'feat1', 'feat2']).concat()['feat2'],
pd.DataFrame(
[
[0.7319939418114051, 1.7319939418114051],
[0.15599452033620265, 1.15599452033620265],
[0.6011150117432088, 1.6011150117432088],
[0.9699098521619943, 1.9699098521619943],
[0.18182496720710062, 1.18182496720710062]
],
index=index,
columns=pd.Index([0, 1], dtype='int64', name='symbol')
)
)
def test_get(self):
index = pd.DatetimeIndex(
[
'2020-01-01 00:00:00',
'2020-01-02 00:00:00',
'2020-01-03 00:00:00',
'2020-01-04 00:00:00',
'2020-01-05 00:00:00'
],
freq='D',
tz=timezone.utc
)
pd.testing.assert_series_equal(
MyData.download(0, shape=(5,), columns='feat0').get(),
pd.Series(
[
0.3745401188473625,
0.9507143064099162,
0.7319939418114051,
0.5986584841970366,
0.15601864044243652
],
index=index,
name='feat0'
)
)
pd.testing.assert_frame_equal(
MyData.download(0, shape=(5, 3), columns=['feat0', 'feat1', 'feat2']).get(),
pd.DataFrame(
[
[0.3745401188473625, 0.9507143064099162, 0.7319939418114051],
[0.5986584841970366, 0.15601864044243652, 0.15599452033620265],
[0.05808361216819946, 0.8661761457749352, 0.6011150117432088],
[0.7080725777960455, 0.020584494295802447, 0.9699098521619943],
[0.8324426408004217, 0.21233911067827616, 0.18182496720710062]
],
index=index,
columns=pd.Index(['feat0', 'feat1', 'feat2'], dtype='object')
)
)
pd.testing.assert_series_equal(
MyData.download(0, shape=(5, 3), columns=['feat0', 'feat1', 'feat2']).get('feat0'),
pd.Series(
[
0.3745401188473625,
0.5986584841970366,
0.05808361216819946,
0.7080725777960455,
0.8324426408004217
],
index=index,
name='feat0'
)
)
pd.testing.assert_frame_equal(
MyData.download([0, 1], shape=(5,), columns='feat0').get(),
pd.DataFrame(
[
[0.3745401188473625, 1.3745401188473625],
[0.9507143064099162, 1.9507143064099162],
[0.7319939418114051, 1.7319939418114051],
[0.5986584841970366, 1.5986584841970366],
[0.15601864044243652, 1.15601864044243652]
],
index=index,
columns=pd.Index([0, 1], dtype='int64', name='symbol')
)
)
pd.testing.assert_frame_equal(
MyData.download([0, 1], shape=(5, 3), columns=['feat0', 'feat1', 'feat2']).get('feat0'),
pd.DataFrame(
[
[0.3745401188473625, 1.3745401188473625],
[0.5986584841970366, 1.5986584841970366],
[0.05808361216819946, 1.05808361216819946],
[0.7080725777960455, 1.7080725777960455],
[0.8324426408004217, 1.8324426408004217]
],
index=index,
columns=pd.Index([0, 1], dtype='int64', name='symbol')
)
)
pd.testing.assert_frame_equal(
MyData.download([0, 1], shape=(5, 3), columns=['feat0', 'feat1', 'feat2']).get(['feat0', 'feat1'])[0],
pd.DataFrame(
[
[0.3745401188473625, 1.3745401188473625],
[0.5986584841970366, 1.5986584841970366],
[0.05808361216819946, 1.05808361216819946],
[0.7080725777960455, 1.7080725777960455],
[0.8324426408004217, 1.8324426408004217]
],
index=index,
columns=pd.Index([0, 1], dtype='int64', name='symbol')
)
)
pd.testing.assert_frame_equal(
MyData.download([0, 1], shape=(5, 3), columns=['feat0', 'feat1', 'feat2']).get()[0],
pd.DataFrame(
[
[0.3745401188473625, 1.3745401188473625],
[0.5986584841970366, 1.5986584841970366],
[0.05808361216819946, 1.05808361216819946],
[0.7080725777960455, 1.7080725777960455],
[0.8324426408004217, 1.8324426408004217]
],
index=index,
columns=pd.Index([0, 1], dtype='int64', name='symbol')
)
)
def test_indexing(self):
assert MyData.download([0, 1], shape=(5,), columns='feat0').iloc[:3].wrapper == \
MyData.download([0, 1], shape=(3,), columns='feat0').wrapper
assert MyData.download([0, 1], shape=(5, 3), columns=['feat0', 'feat1', 'feat2']).iloc[:3].wrapper == \
MyData.download([0, 1], shape=(3, 3), columns=['feat0', 'feat1', 'feat2']).wrapper
assert MyData.download([0, 1], shape=(5, 3), columns=['feat0', 'feat1', 'feat2'])['feat0'].wrapper == \
MyData.download([0, 1], shape=(5,), columns='feat0').wrapper
assert MyData.download([0, 1], shape=(5, 3), columns=['feat0', 'feat1', 'feat2'])[['feat0']].wrapper == \
MyData.download([0, 1], shape=(5, 1), columns=['feat0']).wrapper
def test_stats(self):
index_mask = vbt.symbol_dict({
0: [False, True, True, True, True],
1: [True, True, True, True, False]
})
column_mask = vbt.symbol_dict({
0: [False, True, True],
1: [True, True, False]
})
data = MyData.download(
[0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
missing_index='nan', missing_columns='nan', columns=['feat0', 'feat1', 'feat2'])
stats_index = pd.Index([
'Start', 'End', 'Period', 'Total Symbols', 'Null Counts: 0', 'Null Counts: 1'
], dtype='object')
pd.testing.assert_series_equal(
data.stats(),
pd.Series([
pd.Timestamp('2020-01-01 00:00:00+0000', tz='UTC'),
pd.Timestamp('2020-01-05 00:00:00+0000', tz='UTC'),
pd.Timedelta('5 days 00:00:00'),
2, 2.3333333333333335, 2.3333333333333335
],
index=stats_index,
name='agg_func_mean'
)
)
pd.testing.assert_series_equal(
data.stats(column='feat0'),
pd.Series([
pd.Timestamp('2020-01-01 00:00:00+0000', tz='UTC'),
pd.Timestamp('2020-01-05 00:00:00+0000', tz='UTC'),
pd.Timedelta('5 days 00:00:00'),
2, 5, 1
],
index=stats_index,
name='feat0'
)
)
pd.testing.assert_series_equal(
data.stats(group_by=True),
pd.Series([
pd.Timestamp('2020-01-01 00:00:00+0000', tz='UTC'),
pd.Timestamp('2020-01-05 00:00:00+0000', tz='UTC'),
pd.Timedelta('5 days 00:00:00'),
2, 7, 7
],
index=stats_index,
name='group'
)
)
pd.testing.assert_series_equal(
data['feat0'].stats(),
data.stats(column='feat0')
)
pd.testing.assert_series_equal(
data.replace(wrapper=data.wrapper.replace(group_by=True)).stats(),
data.stats(group_by=True)
)
stats_df = data.stats(agg_func=None)
assert stats_df.shape == (3, 6)
pd.testing.assert_index_equal(stats_df.index, data.wrapper.columns)
pd.testing.assert_index_equal(stats_df.columns, stats_index)
class TestDataUpdater:
def test_update(self):
data = MyData.download(0, shape=(5,), return_arr=True)
updater = vbt.DataUpdater(data)
updater.update()
assert updater.data == data.update()
assert updater.config['data'] == data.update()
def test_update_every(self):
data = MyData.download(0, shape=(5,), return_arr=True)
kwargs = dict(call_count=0)
class DataUpdater(vbt.DataUpdater):
def update(self, kwargs):
super().update()
kwargs['call_count'] += 1
if kwargs['call_count'] == 5:
raise vbt.CancelledError
updater = DataUpdater(data)
updater.update_every(kwargs=kwargs)
for i in range(5):
data = data.update()
assert updater.data == data
assert updater.config['data'] == data