Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
polakowo
GitHub Repository: polakowo/vectorbt
Path: blob/master/tests/test_generic.py
1145 views
1
from datetime import datetime
2
from itertools import product
3
4
import numpy as np
5
import pandas as pd
6
import pytest
7
from numba import njit
8
from sklearn.model_selection import TimeSeriesSplit
9
10
import vectorbt as vbt
11
from vectorbt.generic import nb
12
13
seed = 42
14
15
16
def pandas_applymap(df: pd.DataFrame, func):
17
"""Pandas' native element-wise map, compatible with pandas 2.0+."""
18
if hasattr(pd.DataFrame, 'map'):
19
return df.map(func)
20
return df.applymap(func)
21
22
day_dt = np.timedelta64(86400000000000)
23
24
df = pd.DataFrame({
25
'a': [1, 2, 3, 4, np.nan],
26
'b': [np.nan, 4, 3, 2, 1],
27
'c': [1, 2, np.nan, 2, 1]
28
}, index=pd.DatetimeIndex([
29
datetime(2018, 1, 1),
30
datetime(2018, 1, 2),
31
datetime(2018, 1, 3),
32
datetime(2018, 1, 4),
33
datetime(2018, 1, 5)
34
]))
35
group_by = np.array(['g1', 'g1', 'g2'])
36
37
38
@njit
39
def i_or_col_pow_nb(i_or_col, x, pow):
40
return np.power(x, pow)
41
42
43
@njit
44
def pow_nb(x, pow):
45
return np.power(x, pow)
46
47
48
@njit
49
def nanmean_nb(x):
50
return np.nanmean(x)
51
52
53
@njit
54
def i_col_nanmean_nb(i, col, x):
55
return np.nanmean(x)
56
57
58
@njit
59
def i_nanmean_nb(i, x):
60
return np.nanmean(x)
61
62
63
@njit
64
def col_nanmean_nb(col, x):
65
return np.nanmean(x)
66
67
68
# ############# Global ############# #
69
70
def setup_module():
71
vbt.settings.numba['check_func_suffix'] = True
72
vbt.settings.caching.enabled = False
73
vbt.settings.caching.whitelist = []
74
vbt.settings.caching.blacklist = []
75
76
77
def teardown_module():
78
vbt.settings.reset()
79
80
81
# ############# accessors.py ############# #
82
83
84
class TestAccessors:
85
def test_indexing(self):
86
assert df.vbt['a'].min() == df['a'].vbt.min()
87
88
def test_set_by_mask(self):
89
np.testing.assert_array_equal(
90
nb.set_by_mask_1d_nb(
91
np.array([1, 2, 3, 1, 2, 3]),
92
np.array([True, False, False, True, False, False]),
93
0
94
),
95
np.array([0, 2, 3, 0, 2, 3])
96
)
97
np.testing.assert_array_equal(
98
nb.set_by_mask_1d_nb(
99
np.array([1, 2, 3, 1, 2, 3]),
100
np.array([True, False, False, True, False, False]),
101
0.
102
),
103
np.array([0., 2., 3., 0., 2., 3.])
104
)
105
np.testing.assert_array_equal(
106
nb.set_by_mask_nb(
107
np.array([1, 2, 3, 1, 2, 3])[:, None],
108
np.array([True, False, False, True, False, False])[:, None],
109
0
110
),
111
np.array([0, 2, 3, 0, 2, 3])[:, None]
112
)
113
np.testing.assert_array_equal(
114
nb.set_by_mask_nb(
115
np.array([1, 2, 3, 1, 2, 3])[:, None],
116
np.array([True, False, False, True, False, False])[:, None],
117
0.
118
),
119
np.array([0., 2., 3., 0., 2., 3.])[:, None]
120
)
121
np.testing.assert_array_equal(
122
nb.set_by_mask_mult_1d_nb(
123
np.array([1, 2, 3, 1, 2, 3]),
124
np.array([True, False, False, True, False, False]),
125
np.array([0, -1, -1, 0, -1, -1])
126
),
127
np.array([0, 2, 3, 0, 2, 3])
128
)
129
np.testing.assert_array_equal(
130
nb.set_by_mask_mult_1d_nb(
131
np.array([1, 2, 3, 1, 2, 3]),
132
np.array([True, False, False, True, False, False]),
133
np.array([0., -1., -1., 0., -1., -1.])
134
),
135
np.array([0., 2., 3., 0., 2., 3.])
136
)
137
np.testing.assert_array_equal(
138
nb.set_by_mask_mult_nb(
139
np.array([1, 2, 3, 1, 2, 3])[:, None],
140
np.array([True, False, False, True, False, False])[:, None],
141
np.array([0, -1, -1, 0, -1, -1])[:, None]
142
),
143
np.array([0, 2, 3, 0, 2, 3])[:, None]
144
)
145
np.testing.assert_array_equal(
146
nb.set_by_mask_mult_nb(
147
np.array([1, 2, 3, 1, 2, 3])[:, None],
148
np.array([True, False, False, True, False, False])[:, None],
149
np.array([0., -1., -1., 0., -1., -1.])[:, None]
150
),
151
np.array([0., 2., 3., 0., 2., 3.])[:, None]
152
)
153
154
def test_shuffle(self):
155
pd.testing.assert_series_equal(
156
df['a'].vbt.shuffle(seed=seed),
157
pd.Series(
158
np.array([2.0, np.nan, 3.0, 1.0, 4.0]),
159
index=df['a'].index,
160
name=df['a'].name
161
)
162
)
163
np.testing.assert_array_equal(
164
df['a'].vbt.shuffle(seed=seed).values,
165
nb.shuffle_1d_nb(df['a'].values, seed=seed)
166
)
167
pd.testing.assert_frame_equal(
168
df.vbt.shuffle(seed=seed),
169
pd.DataFrame(
170
np.array([
171
[2., 2., 2.],
172
[np.nan, 4., 1.],
173
[3., 3., 2.],
174
[1., np.nan, 1.],
175
[4., 1., np.nan]
176
]),
177
index=df.index,
178
columns=df.columns
179
)
180
)
181
182
@pytest.mark.parametrize(
183
"test_value",
184
[-1, 0., np.nan],
185
)
186
def test_fillna(self, test_value):
187
pd.testing.assert_series_equal(df['a'].vbt.fillna(test_value), df['a'].fillna(test_value))
188
pd.testing.assert_frame_equal(df.vbt.fillna(test_value), df.fillna(test_value))
189
pd.testing.assert_series_equal(
190
pd.Series([1, 2, 3]).vbt.fillna(-1),
191
pd.Series([1, 2, 3]))
192
pd.testing.assert_series_equal(
193
pd.Series([False, True, False]).vbt.fillna(False),
194
pd.Series([False, True, False]))
195
196
@pytest.mark.parametrize(
197
"test_n",
198
[1, 2, 3, 4, 5],
199
)
200
def test_bshift(self, test_n):
201
pd.testing.assert_series_equal(df['a'].vbt.bshift(test_n), df['a'].shift(-test_n))
202
np.testing.assert_array_equal(
203
df['a'].vbt.bshift(test_n).values,
204
nb.bshift_1d_nb(df['a'].values, test_n)
205
)
206
pd.testing.assert_frame_equal(df.vbt.bshift(test_n), df.shift(-test_n))
207
pd.testing.assert_series_equal(
208
pd.Series([1, 2, 3]).vbt.bshift(1, fill_value=-1),
209
pd.Series([2, 3, -1])
210
)
211
pd.testing.assert_series_equal(
212
pd.Series([True, True, True]).vbt.bshift(1, fill_value=False),
213
pd.Series([True, True, False])
214
)
215
216
@pytest.mark.parametrize(
217
"test_n",
218
[1, 2, 3, 4, 5],
219
)
220
def test_fshift(self, test_n):
221
pd.testing.assert_series_equal(df['a'].vbt.fshift(test_n), df['a'].shift(test_n))
222
np.testing.assert_array_equal(
223
df['a'].vbt.fshift(test_n).values,
224
nb.fshift_1d_nb(df['a'].values, test_n)
225
)
226
pd.testing.assert_frame_equal(df.vbt.fshift(test_n), df.shift(test_n))
227
pd.testing.assert_series_equal(
228
pd.Series([1, 2, 3]).vbt.fshift(1, fill_value=-1),
229
pd.Series([-1, 1, 2])
230
)
231
pd.testing.assert_series_equal(
232
pd.Series([True, True, True]).vbt.fshift(1, fill_value=False),
233
pd.Series([False, True, True])
234
)
235
236
def test_diff(self):
237
pd.testing.assert_series_equal(df['a'].vbt.diff(), df['a'].diff())
238
np.testing.assert_array_equal(df['a'].vbt.diff().values, nb.diff_1d_nb(df['a'].values))
239
pd.testing.assert_frame_equal(df.vbt.diff(), df.diff())
240
241
def test_pct_change(self):
242
pd.testing.assert_series_equal(df['a'].vbt.pct_change(), df['a'].pct_change(fill_method=None))
243
np.testing.assert_array_equal(df['a'].vbt.pct_change().values, nb.pct_change_1d_nb(df['a'].values))
244
pd.testing.assert_frame_equal(df.vbt.pct_change(), df.pct_change(fill_method=None))
245
246
def test_bfill(self):
247
pd.testing.assert_series_equal(df['b'].vbt.bfill(), df['b'].bfill())
248
pd.testing.assert_frame_equal(df.vbt.bfill(), df.bfill())
249
250
def test_ffill(self):
251
pd.testing.assert_series_equal(df['a'].vbt.ffill(), df['a'].ffill())
252
pd.testing.assert_frame_equal(df.vbt.ffill(), df.ffill())
253
254
def test_product(self):
255
assert df['a'].vbt.product() == df['a'].product()
256
np.testing.assert_array_equal(df.vbt.product(), df.product())
257
258
def test_cumsum(self):
259
pd.testing.assert_series_equal(df['a'].vbt.cumsum(), df['a'].cumsum().ffill().fillna(0))
260
pd.testing.assert_frame_equal(df.vbt.cumsum(), df.cumsum().ffill().fillna(0))
261
262
def test_cumprod(self):
263
pd.testing.assert_series_equal(df['a'].vbt.cumprod(), df['a'].cumprod().ffill().fillna(1))
264
pd.testing.assert_frame_equal(df.vbt.cumprod(), df.cumprod().ffill().fillna(1))
265
266
@pytest.mark.parametrize(
267
"test_window,test_minp",
268
list(product([1, 2, 3, 4, 5], [1, None]))
269
)
270
def test_rolling_min(self, test_window, test_minp):
271
if test_minp is None:
272
test_minp = test_window
273
pd.testing.assert_series_equal(
274
df['a'].vbt.rolling_min(test_window, minp=test_minp),
275
df['a'].rolling(test_window, min_periods=test_minp).min()
276
)
277
pd.testing.assert_frame_equal(
278
df.vbt.rolling_min(test_window, minp=test_minp),
279
df.rolling(test_window, min_periods=test_minp).min()
280
)
281
pd.testing.assert_frame_equal(
282
df.vbt.rolling_min(test_window),
283
df.rolling(test_window).min()
284
)
285
286
@pytest.mark.parametrize(
287
"test_window,test_minp",
288
list(product([1, 2, 3, 4, 5], [1, None]))
289
)
290
def test_rolling_max(self, test_window, test_minp):
291
if test_minp is None:
292
test_minp = test_window
293
pd.testing.assert_series_equal(
294
df['a'].vbt.rolling_max(test_window, minp=test_minp),
295
df['a'].rolling(test_window, min_periods=test_minp).max()
296
)
297
pd.testing.assert_frame_equal(
298
df.vbt.rolling_max(test_window, minp=test_minp),
299
df.rolling(test_window, min_periods=test_minp).max()
300
)
301
pd.testing.assert_frame_equal(
302
df.vbt.rolling_max(test_window),
303
df.rolling(test_window).max()
304
)
305
306
@pytest.mark.parametrize(
307
"test_window,test_minp",
308
list(product([1, 2, 3, 4, 5], [1, None]))
309
)
310
def test_rolling_mean(self, test_window, test_minp):
311
if test_minp is None:
312
test_minp = test_window
313
pd.testing.assert_series_equal(
314
df['a'].vbt.rolling_mean(test_window, minp=test_minp),
315
df['a'].rolling(test_window, min_periods=test_minp).mean()
316
)
317
pd.testing.assert_frame_equal(
318
df.vbt.rolling_mean(test_window, minp=test_minp),
319
df.rolling(test_window, min_periods=test_minp).mean()
320
)
321
pd.testing.assert_frame_equal(
322
df.vbt.rolling_mean(test_window),
323
df.rolling(test_window).mean()
324
)
325
326
@pytest.mark.parametrize(
327
"test_window,test_minp,test_ddof",
328
list(product([1, 2, 3, 4, 5], [1, None], [0, 1]))
329
)
330
def test_rolling_std(self, test_window, test_minp, test_ddof):
331
if test_minp is None:
332
test_minp = test_window
333
pd.testing.assert_series_equal(
334
df['a'].vbt.rolling_std(test_window, minp=test_minp, ddof=test_ddof),
335
df['a'].rolling(test_window, min_periods=test_minp).std(ddof=test_ddof)
336
)
337
pd.testing.assert_frame_equal(
338
df.vbt.rolling_std(test_window, minp=test_minp, ddof=test_ddof),
339
df.rolling(test_window, min_periods=test_minp).std(ddof=test_ddof)
340
)
341
pd.testing.assert_frame_equal(
342
df.vbt.rolling_std(test_window),
343
df.rolling(test_window).std()
344
)
345
346
@pytest.mark.parametrize(
347
"test_window,test_minp,test_adjust",
348
list(product([1, 2, 3, 4, 5], [1, None], [False, True]))
349
)
350
def test_ewm_mean(self, test_window, test_minp, test_adjust):
351
if test_minp is None:
352
test_minp = test_window
353
pd.testing.assert_series_equal(
354
df['a'].vbt.ewm_mean(test_window, minp=test_minp, adjust=test_adjust),
355
df['a'].ewm(span=test_window, min_periods=test_minp, adjust=test_adjust).mean()
356
)
357
pd.testing.assert_frame_equal(
358
df.vbt.ewm_mean(test_window, minp=test_minp, adjust=test_adjust),
359
df.ewm(span=test_window, min_periods=test_minp, adjust=test_adjust).mean()
360
)
361
pd.testing.assert_frame_equal(
362
df.vbt.ewm_mean(test_window),
363
df.ewm(span=test_window).mean()
364
)
365
366
@pytest.mark.parametrize(
367
"test_window,test_minp,test_adjust",
368
list(product([1, 2, 3, 4, 5], [1, None], [False, True]))
369
)
370
def test_ewm_std(self, test_window, test_minp, test_adjust):
371
if test_minp is None:
372
test_minp = test_window
373
pd.testing.assert_series_equal(
374
df['a'].vbt.ewm_std(test_window, minp=test_minp, adjust=test_adjust),
375
df['a'].ewm(span=test_window, min_periods=test_minp, adjust=test_adjust).std()
376
)
377
pd.testing.assert_frame_equal(
378
df.vbt.ewm_std(test_window, minp=test_minp, adjust=test_adjust),
379
df.ewm(span=test_window, min_periods=test_minp, adjust=test_adjust).std()
380
)
381
pd.testing.assert_frame_equal(
382
df.vbt.ewm_std(test_window),
383
df.ewm(span=test_window).std()
384
)
385
386
@pytest.mark.parametrize(
387
"test_minp",
388
[1, 3]
389
)
390
def test_expanding_min(self, test_minp):
391
pd.testing.assert_series_equal(
392
df['a'].vbt.expanding_min(minp=test_minp),
393
df['a'].expanding(min_periods=test_minp).min()
394
)
395
pd.testing.assert_frame_equal(
396
df.vbt.expanding_min(minp=test_minp),
397
df.expanding(min_periods=test_minp).min()
398
)
399
pd.testing.assert_frame_equal(
400
df.vbt.expanding_min(),
401
df.expanding().min()
402
)
403
404
@pytest.mark.parametrize(
405
"test_minp",
406
[1, 3]
407
)
408
def test_expanding_max(self, test_minp):
409
pd.testing.assert_series_equal(
410
df['a'].vbt.expanding_max(minp=test_minp),
411
df['a'].expanding(min_periods=test_minp).max()
412
)
413
pd.testing.assert_frame_equal(
414
df.vbt.expanding_max(minp=test_minp),
415
df.expanding(min_periods=test_minp).max()
416
)
417
pd.testing.assert_frame_equal(
418
df.vbt.expanding_max(),
419
df.expanding().max()
420
)
421
422
@pytest.mark.parametrize(
423
"test_minp",
424
[1, 3]
425
)
426
def test_expanding_mean(self, test_minp):
427
pd.testing.assert_series_equal(
428
df['a'].vbt.expanding_mean(minp=test_minp),
429
df['a'].expanding(min_periods=test_minp).mean()
430
)
431
pd.testing.assert_frame_equal(
432
df.vbt.expanding_mean(minp=test_minp),
433
df.expanding(min_periods=test_minp).mean()
434
)
435
pd.testing.assert_frame_equal(
436
df.vbt.expanding_mean(),
437
df.expanding().mean()
438
)
439
440
@pytest.mark.parametrize(
441
"test_minp,test_ddof",
442
list(product([1, 3], [0, 1]))
443
)
444
def test_expanding_std(self, test_minp, test_ddof):
445
pd.testing.assert_series_equal(
446
df['a'].vbt.expanding_std(minp=test_minp, ddof=test_ddof),
447
df['a'].expanding(min_periods=test_minp).std(ddof=test_ddof)
448
)
449
pd.testing.assert_frame_equal(
450
df.vbt.expanding_std(minp=test_minp, ddof=test_ddof),
451
df.expanding(min_periods=test_minp).std(ddof=test_ddof)
452
)
453
pd.testing.assert_frame_equal(
454
df.vbt.expanding_std(),
455
df.expanding().std()
456
)
457
458
def test_apply_along_axis(self):
459
pd.testing.assert_frame_equal(
460
df.vbt.apply_along_axis(i_or_col_pow_nb, 2, axis=0),
461
df.apply(pow_nb, args=(2,), axis=0, raw=True)
462
)
463
pd.testing.assert_frame_equal(
464
df.vbt.apply_along_axis(i_or_col_pow_nb, 2, axis=1),
465
df.apply(pow_nb, args=(2,), axis=1, raw=True)
466
)
467
468
@pytest.mark.parametrize(
469
"test_window,test_minp",
470
list(product([1, 2, 3, 4, 5], [1, None]))
471
)
472
def test_rolling_apply(self, test_window, test_minp):
473
if test_minp is None:
474
test_minp = test_window
475
pd.testing.assert_series_equal(
476
df['a'].vbt.rolling_apply(test_window, i_col_nanmean_nb, minp=test_minp),
477
df['a'].rolling(test_window, min_periods=test_minp).apply(nanmean_nb, raw=True)
478
)
479
pd.testing.assert_frame_equal(
480
df.vbt.rolling_apply(test_window, i_col_nanmean_nb, minp=test_minp),
481
df.rolling(test_window, min_periods=test_minp).apply(nanmean_nb, raw=True)
482
)
483
pd.testing.assert_frame_equal(
484
df.vbt.rolling_apply(test_window, i_col_nanmean_nb),
485
df.rolling(test_window).apply(nanmean_nb, raw=True)
486
)
487
pd.testing.assert_frame_equal(
488
df.vbt.rolling_apply(3, i_nanmean_nb, on_matrix=True),
489
pd.DataFrame(
490
np.array([
491
[np.nan, np.nan, np.nan],
492
[np.nan, np.nan, np.nan],
493
[np.nan, np.nan, np.nan],
494
[2.75, 2.75, 2.75],
495
[np.nan, np.nan, np.nan]
496
]),
497
index=df.index,
498
columns=df.columns
499
)
500
)
501
502
@pytest.mark.parametrize(
503
"test_minp",
504
[1, 3]
505
)
506
def test_expanding_apply(self, test_minp):
507
pd.testing.assert_series_equal(
508
df['a'].vbt.expanding_apply(i_col_nanmean_nb, minp=test_minp),
509
df['a'].expanding(min_periods=test_minp).apply(nanmean_nb, raw=True)
510
)
511
pd.testing.assert_frame_equal(
512
df.vbt.expanding_apply(i_col_nanmean_nb, minp=test_minp),
513
df.expanding(min_periods=test_minp).apply(nanmean_nb, raw=True)
514
)
515
pd.testing.assert_frame_equal(
516
df.vbt.expanding_apply(i_col_nanmean_nb),
517
df.expanding().apply(nanmean_nb, raw=True)
518
)
519
pd.testing.assert_frame_equal(
520
df.vbt.expanding_apply(i_nanmean_nb, on_matrix=True),
521
pd.DataFrame(
522
np.array([
523
[np.nan, np.nan, np.nan],
524
[2.0, 2.0, 2.0],
525
[2.2857142857142856, 2.2857142857142856, 2.2857142857142856],
526
[2.4, 2.4, 2.4],
527
[2.1666666666666665, 2.1666666666666665, 2.1666666666666665]
528
]),
529
index=df.index,
530
columns=df.columns
531
)
532
)
533
534
def test_groupby_apply(self):
535
pd.testing.assert_series_equal(
536
df['a'].vbt.groupby_apply(np.asarray([1, 1, 2, 2, 3]), i_col_nanmean_nb),
537
df['a'].groupby(np.asarray([1, 1, 2, 2, 3])).apply(lambda x: nanmean_nb(x.values))
538
)
539
pd.testing.assert_frame_equal(
540
df.vbt.groupby_apply(np.asarray([1, 1, 2, 2, 3]), i_col_nanmean_nb),
541
df.groupby(np.asarray([1, 1, 2, 2, 3])).agg({
542
'a': lambda x: nanmean_nb(x.values),
543
'b': lambda x: nanmean_nb(x.values),
544
'c': lambda x: nanmean_nb(x.values)
545
}), # any clean way to do column-wise grouping in pandas?
546
)
547
548
def test_groupby_apply_on_matrix(self):
549
pd.testing.assert_frame_equal(
550
df.vbt.groupby_apply(np.asarray([1, 1, 2, 2, 3]), i_nanmean_nb, on_matrix=True),
551
pd.DataFrame(
552
np.array([
553
[2., 2., 2.],
554
[2.8, 2.8, 2.8],
555
[1., 1., 1.]
556
]),
557
index=pd.Index([1, 2, 3], dtype='int64'),
558
columns=df.columns
559
)
560
)
561
562
@pytest.mark.parametrize(
563
"test_freq",
564
['1h', '3d', '1w'],
565
)
566
def test_resample_apply(self, test_freq):
567
pd.testing.assert_series_equal(
568
df['a'].vbt.resample_apply(test_freq, i_col_nanmean_nb),
569
df['a'].resample(test_freq).apply(lambda x: nanmean_nb(x.values))
570
)
571
pd.testing.assert_frame_equal(
572
df.vbt.resample_apply(test_freq, i_col_nanmean_nb),
573
df.resample(test_freq).apply(lambda x: nanmean_nb(x.values))
574
)
575
pd.testing.assert_frame_equal(
576
df.vbt.resample_apply('3d', i_nanmean_nb, on_matrix=True),
577
pd.DataFrame(
578
np.array([
579
[2.28571429, 2.28571429, 2.28571429],
580
[2., 2., 2.]
581
]),
582
index=pd.DatetimeIndex(['2018-01-01', '2018-01-04'], dtype='datetime64[ns]', freq='3D'),
583
columns=df.columns
584
)
585
)
586
587
def test_applymap(self):
588
@njit
589
def mult_nb(i, col, x):
590
return x * 2
591
592
pd.testing.assert_series_equal(
593
df['a'].vbt.applymap(mult_nb),
594
df['a'].map(lambda x: x * 2)
595
)
596
pd.testing.assert_frame_equal(
597
df.vbt.applymap(mult_nb),
598
pandas_applymap(df, lambda x: x * 2)
599
)
600
601
def test_filter(self):
602
@njit
603
def greater_nb(i, col, x):
604
return x > 2
605
606
pd.testing.assert_series_equal(
607
df['a'].vbt.filter(greater_nb),
608
df['a'].map(lambda x: x if x > 2 else np.nan)
609
)
610
pd.testing.assert_frame_equal(
611
df.vbt.filter(greater_nb),
612
pandas_applymap(df, lambda x: x if x > 2 else np.nan)
613
)
614
615
def test_apply_and_reduce(self):
616
@njit
617
def every_nth_nb(col, a, n):
618
return a[::n]
619
620
@njit
621
def sum_nb(col, a, b):
622
return np.nansum(a) + b
623
624
assert df['a'].vbt.apply_and_reduce(every_nth_nb, sum_nb, apply_args=(2,), reduce_args=(3,)) == \
625
df['a'].iloc[::2].sum() + 3
626
pd.testing.assert_series_equal(
627
df.vbt.apply_and_reduce(every_nth_nb, sum_nb, apply_args=(2,), reduce_args=(3,)),
628
df.iloc[::2].sum().rename('apply_and_reduce') + 3
629
)
630
pd.testing.assert_series_equal(
631
df.vbt.apply_and_reduce(
632
every_nth_nb, sum_nb, apply_args=(2,),
633
reduce_args=(3,), wrap_kwargs=dict(to_timedelta=True)),
634
(df.iloc[::2].sum().rename('apply_and_reduce') + 3) * day_dt
635
)
636
637
def test_reduce(self):
638
@njit
639
def sum_nb(col, a):
640
return np.nansum(a)
641
642
assert df['a'].vbt.reduce(sum_nb) == df['a'].sum()
643
pd.testing.assert_series_equal(
644
df.vbt.reduce(sum_nb),
645
df.sum().rename('reduce')
646
)
647
pd.testing.assert_series_equal(
648
df.vbt.reduce(sum_nb, wrap_kwargs=dict(to_timedelta=True)),
649
df.sum().rename('reduce') * day_dt
650
)
651
pd.testing.assert_series_equal(
652
df.vbt.reduce(sum_nb, group_by=group_by),
653
pd.Series([20.0, 6.0], index=['g1', 'g2']).rename('reduce')
654
)
655
656
@njit
657
def argmax_nb(col, a):
658
a = a.copy()
659
a[np.isnan(a)] = -np.inf
660
return np.argmax(a)
661
662
assert df['a'].vbt.reduce(argmax_nb, returns_idx=True) == df['a'].idxmax()
663
pd.testing.assert_series_equal(
664
df.vbt.reduce(argmax_nb, returns_idx=True),
665
df.idxmax().rename('reduce')
666
)
667
pd.testing.assert_series_equal(
668
df.vbt.reduce(argmax_nb, returns_idx=True, flatten=True, group_by=group_by),
669
pd.Series(['2018-01-02', '2018-01-02'], dtype='datetime64[ns]', index=['g1', 'g2']).rename('reduce')
670
)
671
672
@njit
673
def min_and_max_nb(col, a):
674
out = np.empty(2)
675
out[0] = np.nanmin(a)
676
out[1] = np.nanmax(a)
677
return out
678
679
pd.testing.assert_series_equal(
680
df['a'].vbt.reduce(
681
min_and_max_nb, returns_array=True,
682
wrap_kwargs=dict(name_or_index=['min', 'max'])),
683
pd.Series([np.nanmin(df['a']), np.nanmax(df['a'])], index=['min', 'max'], name='a')
684
)
685
pd.testing.assert_frame_equal(
686
df.vbt.reduce(
687
min_and_max_nb, returns_array=True,
688
wrap_kwargs=dict(name_or_index=['min', 'max'])),
689
df.apply(lambda x: pd.Series(np.asarray([np.nanmin(x), np.nanmax(x)]), index=['min', 'max']), axis=0)
690
)
691
pd.testing.assert_frame_equal(
692
df.vbt.reduce(
693
min_and_max_nb, returns_array=True, group_by=group_by,
694
wrap_kwargs=dict(name_or_index=['min', 'max'])),
695
pd.DataFrame([[1.0, 1.0], [4.0, 2.0]], index=['min', 'max'], columns=['g1', 'g2'])
696
)
697
698
@njit
699
def argmin_and_argmax_nb(col, a):
700
# nanargmin and nanargmax
701
out = np.empty(2)
702
_a = a.copy()
703
_a[np.isnan(_a)] = np.inf
704
out[0] = np.argmin(_a)
705
_a = a.copy()
706
_a[np.isnan(_a)] = -np.inf
707
out[1] = np.argmax(_a)
708
return out
709
710
pd.testing.assert_series_equal(
711
df['a'].vbt.reduce(
712
argmin_and_argmax_nb, returns_idx=True, returns_array=True,
713
wrap_kwargs=dict(name_or_index=['idxmin', 'idxmax'])),
714
pd.Series([df['a'].idxmin(), df['a'].idxmax()], index=['idxmin', 'idxmax'], name='a')
715
)
716
pd.testing.assert_frame_equal(
717
df.vbt.reduce(
718
argmin_and_argmax_nb, returns_idx=True, returns_array=True,
719
wrap_kwargs=dict(name_or_index=['idxmin', 'idxmax'])),
720
df.apply(lambda x: pd.Series(np.asarray([x.idxmin(), x.idxmax()]), index=['idxmin', 'idxmax']), axis=0)
721
)
722
pd.testing.assert_frame_equal(
723
df.vbt.reduce(argmin_and_argmax_nb, returns_idx=True, returns_array=True,
724
flatten=True, order='C', group_by=group_by,
725
wrap_kwargs=dict(name_or_index=['idxmin', 'idxmax'])),
726
pd.DataFrame([['2018-01-01', '2018-01-01'], ['2018-01-02', '2018-01-02']],
727
dtype='datetime64[ns]', index=['idxmin', 'idxmax'], columns=['g1', 'g2'])
728
)
729
pd.testing.assert_frame_equal(
730
df.vbt.reduce(argmin_and_argmax_nb, returns_idx=True, returns_array=True,
731
flatten=True, order='F', group_by=group_by,
732
wrap_kwargs=dict(name_or_index=['idxmin', 'idxmax'])),
733
pd.DataFrame([['2018-01-01', '2018-01-01'], ['2018-01-04', '2018-01-02']],
734
dtype='datetime64[ns]', index=['idxmin', 'idxmax'], columns=['g1', 'g2'])
735
)
736
737
def test_squeeze_grouped(self):
738
pd.testing.assert_frame_equal(
739
df.vbt.squeeze_grouped(i_col_nanmean_nb, group_by=group_by),
740
pd.DataFrame([
741
[1.0, 1.0],
742
[3.0, 2.0],
743
[3.0, np.nan],
744
[3.0, 2.0],
745
[1.0, 1.0]
746
], index=df.index, columns=['g1', 'g2'])
747
)
748
assert df['a'].vbt.squeeze_grouped(i_col_nanmean_nb, group_by=True) == 2.5
749
750
def test_flatten_grouped(self):
751
pd.testing.assert_frame_equal(
752
df.vbt.flatten_grouped(group_by=group_by, order='C'),
753
pd.DataFrame([
754
[1.0, 1.0],
755
[np.nan, np.nan],
756
[2.0, 2.0],
757
[4.0, np.nan],
758
[3.0, np.nan],
759
[3.0, np.nan],
760
[4.0, 2.0],
761
[2.0, np.nan],
762
[np.nan, 1.0],
763
[1.0, np.nan]
764
], index=np.repeat(df.index, 2), columns=['g1', 'g2'])
765
)
766
pd.testing.assert_frame_equal(
767
df.vbt.flatten_grouped(group_by=group_by, order='F'),
768
pd.DataFrame([
769
[1.0, 1.0],
770
[2.0, 2.0],
771
[3.0, np.nan],
772
[4.0, 2.0],
773
[np.nan, 1.0],
774
[np.nan, np.nan],
775
[4.0, np.nan],
776
[3.0, np.nan],
777
[2.0, np.nan],
778
[1.0, np.nan]
779
], index=np.tile(df.index, 2), columns=['g1', 'g2'])
780
)
781
pd.testing.assert_series_equal(
782
pd.DataFrame([[False, True], [False, True]]).vbt.flatten_grouped(group_by=True, order='C'),
783
pd.Series([False, True, False, True], name='group')
784
)
785
pd.testing.assert_series_equal(
786
pd.DataFrame([[False, True], [False, True]]).vbt.flatten_grouped(group_by=True, order='F'),
787
pd.Series([False, False, True, True], name='group')
788
)
789
pd.testing.assert_frame_equal(
790
pd.Series([False, True, True, False]).vbt.flatten_grouped(group_by=[0, 0, 0, 1]),
791
pd.DataFrame([[0., 0.], [1., np.nan], [1., np.nan]], columns=pd.Index([0, 1], dtype='int64'))
792
)
793
794
@pytest.mark.parametrize(
795
"test_name,test_func,test_func_nb",
796
[
797
('min', lambda x, **kwargs: x.min(**kwargs), nb.nanmin_nb),
798
('max', lambda x, **kwargs: x.max(**kwargs), nb.nanmax_nb),
799
('mean', lambda x, **kwargs: x.mean(**kwargs), nb.nanmean_nb),
800
('median', lambda x, **kwargs: x.median(**kwargs), nb.nanmedian_nb),
801
('std', lambda x, **kwargs: x.std(**kwargs, ddof=0), nb.nanstd_nb),
802
('count', lambda x, **kwargs: x.count(**kwargs), nb.nancnt_nb),
803
('sum', lambda x, **kwargs: x.sum(**kwargs), nb.nansum_nb)
804
],
805
)
806
def test_funcs(self, test_name, test_func, test_func_nb):
807
# numeric
808
assert test_func(df['a'].vbt) == test_func(df['a'])
809
pd.testing.assert_series_equal(
810
test_func(df.vbt),
811
test_func(df).rename(test_name)
812
)
813
pd.testing.assert_series_equal(
814
test_func(df.vbt, group_by=group_by),
815
pd.Series([
816
test_func(df[['a', 'b']].stack()),
817
test_func(df['c'])
818
], index=['g1', 'g2']).rename(test_name)
819
)
820
np.testing.assert_array_equal(test_func(df).values, test_func_nb(df.values))
821
pd.testing.assert_series_equal(
822
test_func(df.vbt, wrap_kwargs=dict(to_timedelta=True)),
823
test_func(df).rename(test_name) * day_dt
824
)
825
# boolean
826
bool_ts = df == df
827
assert test_func(bool_ts['a'].vbt) == test_func(bool_ts['a'])
828
pd.testing.assert_series_equal(
829
test_func(bool_ts.vbt),
830
test_func(bool_ts).rename(test_name)
831
)
832
pd.testing.assert_series_equal(
833
test_func(bool_ts.vbt, wrap_kwargs=dict(to_timedelta=True)),
834
test_func(bool_ts).rename(test_name) * day_dt
835
)
836
837
@pytest.mark.parametrize(
838
"test_name,test_func",
839
[
840
('idxmin', lambda x, **kwargs: x.idxmin(**kwargs)),
841
('idxmax', lambda x, **kwargs: x.idxmax(**kwargs))
842
],
843
)
844
def test_arg_funcs(self, test_name, test_func):
845
assert test_func(df['a'].vbt) == test_func(df['a'])
846
pd.testing.assert_series_equal(
847
test_func(df.vbt),
848
test_func(df).rename(test_name)
849
)
850
pd.testing.assert_series_equal(
851
test_func(df.vbt, group_by=group_by),
852
pd.Series([
853
test_func(df[['a', 'b']].stack())[0],
854
test_func(df['c'])
855
], index=['g1', 'g2'], dtype='datetime64[ns]').rename(test_name)
856
)
857
858
def test_describe(self):
859
pd.testing.assert_series_equal(
860
df['a'].vbt.describe(),
861
df['a'].describe()
862
)
863
pd.testing.assert_frame_equal(
864
df.vbt.describe(percentiles=None),
865
df.describe(percentiles=None)
866
)
867
pd.testing.assert_frame_equal(
868
df.vbt.describe(percentiles=[]),
869
df.describe(percentiles=[])
870
)
871
test_against = df.describe(percentiles=np.arange(0, 1, 0.1))
872
pd.testing.assert_frame_equal(
873
df.vbt.describe(percentiles=np.arange(0, 1, 0.1)),
874
test_against
875
)
876
pd.testing.assert_frame_equal(
877
df.vbt.describe(percentiles=np.arange(0, 1, 0.1), group_by=group_by),
878
pd.DataFrame({
879
'g1': df[['a', 'b']].stack().describe(percentiles=np.arange(0, 1, 0.1)).values,
880
'g2': df['c'].describe(percentiles=np.arange(0, 1, 0.1)).values
881
}, index=test_against.index)
882
)
883
884
def test_value_counts(self):
885
pd.testing.assert_series_equal(
886
df['a'].vbt.value_counts(),
887
pd.Series(
888
np.array([1, 1, 1, 1, 1]),
889
index=pd.Index([1.0, 2.0, 3.0, 4.0, np.nan], dtype='float64'),
890
name='a'
891
)
892
)
893
mapping = {1.: 'one', 2.: 'two', 3.: 'three', 4.: 'four'}
894
pd.testing.assert_series_equal(
895
df['a'].vbt.value_counts(mapping=mapping),
896
pd.Series(
897
np.array([1, 1, 1, 1, 1]),
898
index=pd.Index(['one', 'two', 'three', 'four', None], dtype='object'),
899
name='a'
900
)
901
)
902
pd.testing.assert_frame_equal(
903
df.vbt.value_counts(),
904
pd.DataFrame(
905
np.array([
906
[1, 1, 2],
907
[1, 1, 2],
908
[1, 1, 0],
909
[1, 1, 0],
910
[1, 1, 1]
911
]),
912
index=pd.Index([1.0, 2.0, 3.0, 4.0, np.nan], dtype='float64'),
913
columns=df.columns
914
)
915
)
916
pd.testing.assert_frame_equal(
917
df.vbt.value_counts(group_by=group_by),
918
pd.DataFrame(
919
np.array([
920
[2, 2],
921
[2, 2],
922
[2, 0],
923
[2, 0],
924
[2, 1]
925
]),
926
index=pd.Index([1.0, 2.0, 3.0, 4.0, np.nan], dtype='float64'),
927
columns=pd.Index(['g1', 'g2'], dtype='object')
928
)
929
)
930
pd.testing.assert_frame_equal(
931
df.vbt.value_counts(sort=True),
932
pd.DataFrame(
933
np.array([
934
[1, 1, 2],
935
[1, 1, 2],
936
[1, 1, 1],
937
[1, 1, 0],
938
[1, 1, 0]
939
]),
940
index=pd.Index([1.0, 2.0, np.nan, 3.0, 4.0], dtype='float64'),
941
columns=df.columns
942
)
943
)
944
pd.testing.assert_frame_equal(
945
df.vbt.value_counts(sort=True, ascending=True),
946
pd.DataFrame(
947
np.array([
948
[1, 1, 0],
949
[1, 1, 0],
950
[1, 1, 1],
951
[1, 1, 2],
952
[1, 1, 2]
953
]),
954
index=pd.Index([3.0, 4.0, np.nan, 1.0, 2.0], dtype='float64'),
955
columns=df.columns
956
)
957
)
958
pd.testing.assert_frame_equal(
959
df.vbt.value_counts(sort=True, normalize=True),
960
pd.DataFrame(
961
np.array([
962
[0.06666666666666667, 0.06666666666666667, 0.13333333333333333],
963
[0.06666666666666667, 0.06666666666666667, 0.13333333333333333],
964
[0.06666666666666667, 0.06666666666666667, 0.06666666666666667],
965
[0.06666666666666667, 0.06666666666666667, 0.0],
966
[0.06666666666666667, 0.06666666666666667, 0.0]
967
]),
968
index=pd.Index([1.0, 2.0, np.nan, 3.0, 4.0], dtype='float64'),
969
columns=df.columns
970
)
971
)
972
pd.testing.assert_frame_equal(
973
df.vbt.value_counts(sort=True, normalize=True, dropna=True),
974
pd.DataFrame(
975
np.array([
976
[0.08333333333333333, 0.08333333333333333, 0.16666666666666666],
977
[0.08333333333333333, 0.08333333333333333, 0.16666666666666666],
978
[0.08333333333333333, 0.08333333333333333, 0.0],
979
[0.08333333333333333, 0.08333333333333333, 0.0]
980
]),
981
index=pd.Index([1.0, 2.0, 3.0, 4.0], dtype='float64'),
982
columns=df.columns
983
)
984
)
985
986
def test_drawdown(self):
987
pd.testing.assert_series_equal(
988
df['a'].vbt.drawdown(),
989
df['a'] / df['a'].expanding().max() - 1
990
)
991
pd.testing.assert_frame_equal(
992
df.vbt.drawdown(),
993
df / df.expanding().max() - 1
994
)
995
996
def test_drawdowns(self):
997
assert type(df['a'].vbt.drawdowns) is vbt.Drawdowns
998
assert df['a'].vbt.drawdowns.wrapper.freq == df['a'].vbt.wrapper.freq
999
assert df['a'].vbt.drawdowns.wrapper.ndim == df['a'].ndim
1000
assert df.vbt.drawdowns.wrapper.ndim == df.ndim
1001
1002
def test_to_mapped(self):
1003
np.testing.assert_array_equal(
1004
df.vbt.to_mapped().values,
1005
np.array([1., 2., 3., 4., 4., 3., 2., 1., 1., 2., 2., 1.])
1006
)
1007
np.testing.assert_array_equal(
1008
df.vbt.to_mapped().col_arr,
1009
np.array([0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2])
1010
)
1011
np.testing.assert_array_equal(
1012
df.vbt.to_mapped().idx_arr,
1013
np.array([0, 1, 2, 3, 1, 2, 3, 4, 0, 1, 3, 4])
1014
)
1015
np.testing.assert_array_equal(
1016
df.vbt.to_mapped(dropna=False).values,
1017
np.array([1., 2., 3., 4., np.nan, np.nan, 4., 3., 2., 1., 1., 2., np.nan, 2., 1.])
1018
)
1019
np.testing.assert_array_equal(
1020
df.vbt.to_mapped(dropna=False).col_arr,
1021
np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2])
1022
)
1023
np.testing.assert_array_equal(
1024
df.vbt.to_mapped(dropna=False).idx_arr,
1025
np.array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4])
1026
)
1027
1028
def test_zscore(self):
1029
pd.testing.assert_series_equal(
1030
df['a'].vbt.zscore(),
1031
(df['a'] - df['a'].mean()) / df['a'].std(ddof=0)
1032
)
1033
pd.testing.assert_frame_equal(
1034
df.vbt.zscore(),
1035
(df - df.mean()) / df.std(ddof=0)
1036
)
1037
1038
def test_split(self):
1039
splitter = TimeSeriesSplit(n_splits=2)
1040
(train_df, train_indexes), (test_df, test_indexes) = df['a'].vbt.split(splitter)
1041
pd.testing.assert_frame_equal(
1042
train_df,
1043
pd.DataFrame(
1044
np.array([
1045
[1.0, 1.0],
1046
[2.0, 2.0],
1047
[3.0, 3.0],
1048
[np.nan, 4.0]
1049
]),
1050
index=pd.RangeIndex(start=0, stop=4, step=1),
1051
columns=pd.Index([0, 1], dtype='int64', name='split_idx')
1052
)
1053
)
1054
target = [
1055
pd.DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'],
1056
dtype='datetime64[ns]', name='split_0', freq=None),
1057
pd.DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04'],
1058
dtype='datetime64[ns]', name='split_1', freq=None)
1059
]
1060
for i in range(len(target)):
1061
pd.testing.assert_index_equal(
1062
train_indexes[i],
1063
target[i]
1064
)
1065
pd.testing.assert_frame_equal(
1066
test_df,
1067
pd.DataFrame(
1068
np.array([
1069
[4.0, np.nan]
1070
]),
1071
index=pd.RangeIndex(start=0, stop=1, step=1),
1072
columns=pd.Index([0, 1], dtype='int64', name='split_idx')
1073
)
1074
)
1075
target = [
1076
pd.DatetimeIndex(['2018-01-04'], dtype='datetime64[ns]', name='split_0', freq=None),
1077
pd.DatetimeIndex(['2018-01-05'], dtype='datetime64[ns]', name='split_1', freq=None)
1078
]
1079
for i in range(len(target)):
1080
pd.testing.assert_index_equal(
1081
test_indexes[i],
1082
target[i]
1083
)
1084
(train_df, train_indexes), (test_df, test_indexes) = df.vbt.split(splitter)
1085
pd.testing.assert_frame_equal(
1086
train_df,
1087
pd.DataFrame(
1088
np.array([
1089
[1.0, np.nan, 1.0, 1.0, np.nan, 1.0],
1090
[2.0, 4.0, 2.0, 2.0, 4.0, 2.0],
1091
[3.0, 3.0, np.nan, 3.0, 3.0, np.nan],
1092
[np.nan, np.nan, np.nan, 4.0, 2.0, 2.0]
1093
]),
1094
index=pd.RangeIndex(start=0, stop=4, step=1),
1095
columns=pd.MultiIndex.from_tuples([
1096
(0, 'a'),
1097
(0, 'b'),
1098
(0, 'c'),
1099
(1, 'a'),
1100
(1, 'b'),
1101
(1, 'c')
1102
], names=['split_idx', None])
1103
)
1104
)
1105
target = [
1106
pd.DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'],
1107
dtype='datetime64[ns]', name='split_0', freq=None),
1108
pd.DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04'],
1109
dtype='datetime64[ns]', name='split_1', freq=None)
1110
]
1111
for i in range(len(target)):
1112
pd.testing.assert_index_equal(
1113
train_indexes[i],
1114
target[i]
1115
)
1116
pd.testing.assert_frame_equal(
1117
test_df,
1118
pd.DataFrame(
1119
np.array([
1120
[4.0, 2.0, 2.0, np.nan, 1.0, 1.0]
1121
]),
1122
index=pd.RangeIndex(start=0, stop=1, step=1),
1123
columns=pd.MultiIndex.from_tuples([
1124
(0, 'a'),
1125
(0, 'b'),
1126
(0, 'c'),
1127
(1, 'a'),
1128
(1, 'b'),
1129
(1, 'c')
1130
], names=['split_idx', None])
1131
)
1132
)
1133
target = [
1134
pd.DatetimeIndex(['2018-01-04'], dtype='datetime64[ns]', name='split_0', freq=None),
1135
pd.DatetimeIndex(['2018-01-05'], dtype='datetime64[ns]', name='split_1', freq=None)
1136
]
1137
for i in range(len(target)):
1138
pd.testing.assert_index_equal(
1139
test_indexes[i],
1140
target[i]
1141
)
1142
1143
def test_range_split(self):
1144
pd.testing.assert_frame_equal(
1145
df['a'].vbt.range_split(n=2)[0],
1146
pd.DataFrame(
1147
np.array([
1148
[1., 4.],
1149
[2., np.nan]
1150
]),
1151
index=pd.RangeIndex(start=0, stop=2, step=1),
1152
columns=pd.Index([0, 1], dtype='int64', name='split_idx')
1153
)
1154
)
1155
target = [
1156
pd.DatetimeIndex(['2018-01-01', '2018-01-02'], dtype='datetime64[ns]', name='split_0', freq=None),
1157
pd.DatetimeIndex(['2018-01-04', '2018-01-05'], dtype='datetime64[ns]', name='split_1', freq=None)
1158
]
1159
for i in range(len(target)):
1160
pd.testing.assert_index_equal(
1161
df['a'].vbt.range_split(n=2)[1][i],
1162
target[i]
1163
)
1164
pd.testing.assert_frame_equal(
1165
df['a'].vbt.range_split(range_len=2)[0],
1166
pd.DataFrame(
1167
np.array([
1168
[1., 2., 3., 4.],
1169
[2., 3., 4., np.nan]
1170
]),
1171
index=pd.RangeIndex(start=0, stop=2, step=1),
1172
columns=pd.Index([0, 1, 2, 3], dtype='int64', name='split_idx')
1173
)
1174
)
1175
target = [
1176
pd.DatetimeIndex(['2018-01-01', '2018-01-02'], dtype='datetime64[ns]', name='split_0', freq=None),
1177
pd.DatetimeIndex(['2018-01-02', '2018-01-03'], dtype='datetime64[ns]', name='split_1', freq=None),
1178
pd.DatetimeIndex(['2018-01-03', '2018-01-04'], dtype='datetime64[ns]', name='split_2', freq=None),
1179
pd.DatetimeIndex(['2018-01-04', '2018-01-05'], dtype='datetime64[ns]', name='split_3', freq=None)
1180
]
1181
for i in range(len(target)):
1182
pd.testing.assert_index_equal(
1183
df['a'].vbt.range_split(range_len=2)[1][i],
1184
target[i]
1185
)
1186
pd.testing.assert_frame_equal(
1187
df['a'].vbt.range_split(range_len=2, n=3)[0],
1188
pd.DataFrame(
1189
np.array([
1190
[1., 3., 4.],
1191
[2., 4., np.nan]
1192
]),
1193
index=pd.RangeIndex(start=0, stop=2, step=1),
1194
columns=pd.Index([0, 1, 2], dtype='int64', name='split_idx')
1195
)
1196
)
1197
target = [
1198
pd.DatetimeIndex(['2018-01-01', '2018-01-02'], dtype='datetime64[ns]', name='split_0', freq=None),
1199
pd.DatetimeIndex(['2018-01-03', '2018-01-04'], dtype='datetime64[ns]', name='split_1', freq=None),
1200
pd.DatetimeIndex(['2018-01-04', '2018-01-05'], dtype='datetime64[ns]', name='split_2', freq=None)
1201
]
1202
for i in range(len(target)):
1203
pd.testing.assert_index_equal(
1204
df['a'].vbt.range_split(range_len=2, n=3)[1][i],
1205
target[i]
1206
)
1207
pd.testing.assert_frame_equal(
1208
df['a'].vbt.range_split(range_len=3, n=2)[0],
1209
pd.DataFrame(
1210
np.array([
1211
[1., 3.],
1212
[2., 4.],
1213
[3., np.nan]
1214
]),
1215
index=pd.RangeIndex(start=0, stop=3, step=1),
1216
columns=pd.Index([0, 1], dtype='int64', name='split_idx')
1217
)
1218
)
1219
target = [
1220
pd.DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'],
1221
dtype='datetime64[ns]', name='split_0', freq=None),
1222
pd.DatetimeIndex(['2018-01-03', '2018-01-04', '2018-01-05'],
1223
dtype='datetime64[ns]', name='split_1', freq=None)
1224
]
1225
for i in range(len(target)):
1226
pd.testing.assert_index_equal(
1227
df['a'].vbt.range_split(range_len=3, n=2)[1][i],
1228
target[i]
1229
)
1230
pd.testing.assert_frame_equal(
1231
df.vbt.range_split(n=2)[0],
1232
pd.DataFrame(
1233
np.array([
1234
[1.0, np.nan, 1.0, 4.0, 2.0, 2.0],
1235
[2.0, 4.0, 2.0, np.nan, 1.0, 1.0]
1236
]),
1237
index=pd.RangeIndex(start=0, stop=2, step=1),
1238
columns=pd.MultiIndex.from_arrays([
1239
pd.Index([0, 0, 0, 1, 1, 1], dtype='int64', name='split_idx'),
1240
pd.Index(['a', 'b', 'c', 'a', 'b', 'c'], dtype='object')
1241
])
1242
)
1243
)
1244
target = [
1245
pd.DatetimeIndex(['2018-01-01', '2018-01-02'], dtype='datetime64[ns]', name='split_0', freq=None),
1246
pd.DatetimeIndex(['2018-01-04', '2018-01-05'], dtype='datetime64[ns]', name='split_1', freq=None)
1247
]
1248
for i in range(len(target)):
1249
pd.testing.assert_index_equal(
1250
df.vbt.range_split(n=2)[1][i],
1251
target[i]
1252
)
1253
pd.testing.assert_frame_equal(
1254
df.vbt.range_split(start_idxs=[0, 1], end_idxs=[2, 3])[0],
1255
pd.DataFrame(
1256
np.array([
1257
[1.0, np.nan, 1.0, 2.0, 4.0, 2.0],
1258
[2.0, 4.0, 2.0, 3.0, 3.0, np.nan],
1259
[3.0, 3.0, np.nan, 4.0, 2.0, 2.0]
1260
]),
1261
index=pd.RangeIndex(start=0, stop=3, step=1),
1262
columns=pd.MultiIndex.from_arrays([
1263
pd.Index([0, 0, 0, 1, 1, 1], dtype='int64', name='split_idx'),
1264
pd.Index(['a', 'b', 'c', 'a', 'b', 'c'], dtype='object')
1265
])
1266
)
1267
)
1268
target = [
1269
pd.DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'],
1270
dtype='datetime64[ns]', name='split_0', freq=None),
1271
pd.DatetimeIndex(['2018-01-02', '2018-01-03', '2018-01-04'],
1272
dtype='datetime64[ns]', name='split_1', freq=None)
1273
]
1274
for i in range(len(target)):
1275
pd.testing.assert_index_equal(
1276
df.vbt.range_split(start_idxs=[0, 1], end_idxs=[2, 3])[1][i],
1277
target[i]
1278
)
1279
pd.testing.assert_frame_equal(
1280
df.vbt.range_split(start_idxs=df.index[[0, 1]], end_idxs=df.index[[2, 3]])[0],
1281
pd.DataFrame(
1282
np.array([
1283
[1.0, np.nan, 1.0, 2.0, 4.0, 2.0],
1284
[2.0, 4.0, 2.0, 3.0, 3.0, np.nan],
1285
[3.0, 3.0, np.nan, 4.0, 2.0, 2.0]
1286
]),
1287
index=pd.RangeIndex(start=0, stop=3, step=1),
1288
columns=pd.MultiIndex.from_arrays([
1289
pd.Index([0, 0, 0, 1, 1, 1], dtype='int64', name='split_idx'),
1290
pd.Index(['a', 'b', 'c', 'a', 'b', 'c'], dtype='object')
1291
])
1292
)
1293
)
1294
target = [
1295
pd.DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'],
1296
dtype='datetime64[ns]', name='split_0', freq=None),
1297
pd.DatetimeIndex(['2018-01-02', '2018-01-03', '2018-01-04'],
1298
dtype='datetime64[ns]', name='split_1', freq=None)
1299
]
1300
for i in range(len(target)):
1301
pd.testing.assert_index_equal(
1302
df.vbt.range_split(start_idxs=df.index[[0, 1]], end_idxs=df.index[[2, 3]])[1][i],
1303
target[i]
1304
)
1305
pd.testing.assert_frame_equal(
1306
df.vbt.range_split(start_idxs=df.index[[0]], end_idxs=df.index[[2, 3]])[0],
1307
pd.DataFrame(
1308
np.array([
1309
[1.0, np.nan, 1.0, 1.0, np.nan, 1.0],
1310
[2.0, 4.0, 2.0, 2.0, 4.0, 2.0],
1311
[3.0, 3.0, np.nan, 3.0, 3.0, np.nan],
1312
[np.nan, np.nan, np.nan, 4.0, 2.0, 2.0]
1313
]),
1314
index=pd.RangeIndex(start=0, stop=4, step=1),
1315
columns=pd.MultiIndex.from_arrays([
1316
pd.Index([0, 0, 0, 1, 1, 1], dtype='int64', name='split_idx'),
1317
pd.Index(['a', 'b', 'c', 'a', 'b', 'c'], dtype='object')
1318
])
1319
)
1320
)
1321
target = [
1322
pd.DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'],
1323
dtype='datetime64[ns]', name='split_0', freq=None),
1324
pd.DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04'],
1325
dtype='datetime64[ns]', name='split_1', freq=None)
1326
]
1327
for i in range(len(target)):
1328
pd.testing.assert_index_equal(
1329
df.vbt.range_split(start_idxs=df.index[[0]], end_idxs=df.index[[2, 3]])[1][i],
1330
target[i]
1331
)
1332
with pytest.raises(Exception):
1333
df.vbt.range_split()
1334
with pytest.raises(Exception):
1335
df.vbt.range_split(start_idxs=[0, 1])
1336
with pytest.raises(Exception):
1337
df.vbt.range_split(end_idxs=[2, 4])
1338
with pytest.raises(Exception):
1339
df.vbt.range_split(min_len=10)
1340
with pytest.raises(Exception):
1341
df.vbt.range_split(n=10)
1342
1343
def test_rolling_split(self):
1344
(df1, indexes1), (df2, indexes2), (df3, indexes3) = df['a'].vbt.rolling_split(
1345
window_len=4, set_lens=(1, 1), left_to_right=False)
1346
pd.testing.assert_frame_equal(
1347
df1,
1348
pd.DataFrame(
1349
np.array([
1350
[1.0, 2.0],
1351
[2.0, 3.0]
1352
]),
1353
index=pd.RangeIndex(start=0, stop=2, step=1),
1354
columns=pd.Index([0, 1], dtype='int64', name='split_idx')
1355
)
1356
)
1357
target = [
1358
pd.DatetimeIndex(['2018-01-01', '2018-01-02'], dtype='datetime64[ns]', name='split_0', freq=None),
1359
pd.DatetimeIndex(['2018-01-02', '2018-01-03'], dtype='datetime64[ns]', name='split_1', freq=None)
1360
]
1361
for i in range(len(target)):
1362
pd.testing.assert_index_equal(
1363
indexes1[i],
1364
target[i]
1365
)
1366
pd.testing.assert_frame_equal(
1367
df2,
1368
pd.DataFrame(
1369
np.array([
1370
[3.0, 4.0]
1371
]),
1372
index=pd.RangeIndex(start=0, stop=1, step=1),
1373
columns=pd.Index([0, 1], dtype='int64', name='split_idx')
1374
)
1375
)
1376
target = [
1377
pd.DatetimeIndex(['2018-01-03'], dtype='datetime64[ns]', name='split_0', freq=None),
1378
pd.DatetimeIndex(['2018-01-04'], dtype='datetime64[ns]', name='split_1', freq=None)
1379
]
1380
for i in range(len(target)):
1381
pd.testing.assert_index_equal(
1382
indexes2[i],
1383
target[i]
1384
)
1385
pd.testing.assert_frame_equal(
1386
df3,
1387
pd.DataFrame(
1388
np.array([
1389
[4.0, np.nan]
1390
]),
1391
index=pd.RangeIndex(start=0, stop=1, step=1),
1392
columns=pd.Index([0, 1], dtype='int64', name='split_idx')
1393
)
1394
)
1395
target = [
1396
pd.DatetimeIndex(['2018-01-04'], dtype='datetime64[ns]', name='split_0', freq=None),
1397
pd.DatetimeIndex(['2018-01-05'], dtype='datetime64[ns]', name='split_1', freq=None)
1398
]
1399
for i in range(len(target)):
1400
pd.testing.assert_index_equal(
1401
indexes3[i],
1402
target[i]
1403
)
1404
(df1, indexes1), (df2, indexes2), (df3, indexes3) = df['a'].vbt.rolling_split(
1405
window_len=4, set_lens=(1, 1), left_to_right=True)
1406
pd.testing.assert_frame_equal(
1407
df1,
1408
pd.DataFrame(
1409
np.array([
1410
[1.0, 2.0]
1411
]),
1412
index=pd.RangeIndex(start=0, stop=1, step=1),
1413
columns=pd.Index([0, 1], dtype='int64', name='split_idx')
1414
)
1415
)
1416
target = [
1417
pd.DatetimeIndex(['2018-01-01'], dtype='datetime64[ns]', name='split_0', freq=None),
1418
pd.DatetimeIndex(['2018-01-02'], dtype='datetime64[ns]', name='split_1', freq=None)
1419
]
1420
for i in range(len(target)):
1421
pd.testing.assert_index_equal(
1422
indexes1[i],
1423
target[i]
1424
)
1425
pd.testing.assert_frame_equal(
1426
df2,
1427
pd.DataFrame(
1428
np.array([
1429
[2.0, 3.0]
1430
]),
1431
index=pd.RangeIndex(start=0, stop=1, step=1),
1432
columns=pd.Index([0, 1], dtype='int64', name='split_idx')
1433
)
1434
)
1435
target = [
1436
pd.DatetimeIndex(['2018-01-02'], dtype='datetime64[ns]', name='split_0', freq=None),
1437
pd.DatetimeIndex(['2018-01-03'], dtype='datetime64[ns]', name='split_1', freq=None)
1438
]
1439
for i in range(len(target)):
1440
pd.testing.assert_index_equal(
1441
indexes2[i],
1442
target[i]
1443
)
1444
pd.testing.assert_frame_equal(
1445
df3,
1446
pd.DataFrame(
1447
np.array([
1448
[3.0, 4.0],
1449
[4.0, np.nan]
1450
]),
1451
index=pd.RangeIndex(start=0, stop=2, step=1),
1452
columns=pd.Index([0, 1], dtype='int64', name='split_idx')
1453
)
1454
)
1455
target = [
1456
pd.DatetimeIndex(['2018-01-03', '2018-01-04'], dtype='datetime64[ns]', name='split_0', freq=None),
1457
pd.DatetimeIndex(['2018-01-04', '2018-01-05'], dtype='datetime64[ns]', name='split_1', freq=None)
1458
]
1459
for i in range(len(target)):
1460
pd.testing.assert_index_equal(
1461
indexes3[i],
1462
target[i]
1463
)
1464
(df1, indexes1), (df2, indexes2), (df3, indexes3) = df['a'].vbt.rolling_split(
1465
window_len=4, set_lens=(0.25, 0.25), left_to_right=[False, True])
1466
pd.testing.assert_frame_equal(
1467
df1,
1468
pd.DataFrame(
1469
np.array([
1470
[1.0, 2.0],
1471
[2.0, np.nan]
1472
]),
1473
index=pd.RangeIndex(start=0, stop=2, step=1),
1474
columns=pd.Index([0, 1], dtype='int64', name='split_idx')
1475
)
1476
)
1477
target = [
1478
pd.DatetimeIndex(['2018-01-01', '2018-01-02'], dtype='datetime64[ns]', name='split_0', freq=None),
1479
pd.DatetimeIndex(['2018-01-02'], dtype='datetime64[ns]', name='split_1', freq=None)
1480
]
1481
for i in range(len(target)):
1482
pd.testing.assert_index_equal(
1483
indexes1[i],
1484
target[i]
1485
)
1486
pd.testing.assert_frame_equal(
1487
df2,
1488
pd.DataFrame(
1489
np.array([
1490
[3.0, 3.0]
1491
]),
1492
index=pd.RangeIndex(start=0, stop=1, step=1),
1493
columns=pd.Index([0, 1], dtype='int64', name='split_idx')
1494
)
1495
)
1496
target = [
1497
pd.DatetimeIndex(['2018-01-03'], dtype='datetime64[ns]', name='split_0', freq=None),
1498
pd.DatetimeIndex(['2018-01-03'], dtype='datetime64[ns]', name='split_1', freq=None)
1499
]
1500
for i in range(len(target)):
1501
pd.testing.assert_index_equal(
1502
indexes2[i],
1503
target[i]
1504
)
1505
pd.testing.assert_frame_equal(
1506
df3,
1507
pd.DataFrame(
1508
np.array([
1509
[4.0, 4.0],
1510
[np.nan, np.nan]
1511
]),
1512
index=pd.RangeIndex(start=0, stop=2, step=1),
1513
columns=pd.Index([0, 1], dtype='int64', name='split_idx')
1514
)
1515
)
1516
target = [
1517
pd.DatetimeIndex(['2018-01-04'], dtype='datetime64[ns]', name='split_0', freq=None),
1518
pd.DatetimeIndex(['2018-01-04', '2018-01-05'], dtype='datetime64[ns]', name='split_1', freq=None)
1519
]
1520
for i in range(len(target)):
1521
pd.testing.assert_index_equal(
1522
indexes3[i],
1523
target[i]
1524
)
1525
df1, indexes1 = df['a'].vbt.rolling_split(window_len=2, n=2)
1526
pd.testing.assert_frame_equal(
1527
df1,
1528
pd.DataFrame(
1529
np.array([
1530
[1.0, 4.0],
1531
[2.0, np.nan]
1532
]),
1533
index=pd.RangeIndex(start=0, stop=2, step=1),
1534
columns=pd.Index([0, 1], dtype='int64', name='split_idx')
1535
)
1536
)
1537
target = [
1538
pd.DatetimeIndex(['2018-01-01', '2018-01-02'], dtype='datetime64[ns]', name='split_0', freq=None),
1539
pd.DatetimeIndex(['2018-01-04', '2018-01-05'], dtype='datetime64[ns]', name='split_1', freq=None)
1540
]
1541
df1, indexes1 = df['a'].vbt.rolling_split(window_len=0.4, n=2)
1542
pd.testing.assert_frame_equal(
1543
df1,
1544
pd.DataFrame(
1545
np.array([
1546
[1.0, 4.0],
1547
[2.0, np.nan]
1548
]),
1549
index=pd.RangeIndex(start=0, stop=2, step=1),
1550
columns=pd.Index([0, 1], dtype='int64', name='split_idx')
1551
)
1552
)
1553
target = [
1554
pd.DatetimeIndex(['2018-01-01', '2018-01-02'], dtype='datetime64[ns]', name='split_0', freq=None),
1555
pd.DatetimeIndex(['2018-01-04', '2018-01-05'], dtype='datetime64[ns]', name='split_1', freq=None)
1556
]
1557
for i in range(len(target)):
1558
pd.testing.assert_index_equal(
1559
indexes1[i],
1560
target[i]
1561
)
1562
with pytest.raises(Exception):
1563
df.vbt.rolling_split()
1564
with pytest.raises(Exception):
1565
df.vbt.rolling_split(window_len=3, set_lens=(3, 1))
1566
with pytest.raises(Exception):
1567
df.vbt.rolling_split(window_len=1, set_lens=(1, 1))
1568
with pytest.raises(Exception):
1569
df.vbt.rolling_split(n=2, min_len=10)
1570
with pytest.raises(Exception):
1571
df.vbt.rolling_split(n=10)
1572
1573
def test_expanding_split(self):
1574
(df1, indexes1), (df2, indexes2), (df3, indexes3) = df['a'].vbt.expanding_split(
1575
min_len=4, set_lens=(1, 1), left_to_right=False)
1576
pd.testing.assert_frame_equal(
1577
df1,
1578
pd.DataFrame(
1579
np.array([
1580
[1.0, 1.0],
1581
[2.0, 2.0],
1582
[np.nan, 3.0]
1583
]),
1584
index=pd.RangeIndex(start=0, stop=3, step=1),
1585
columns=pd.Index([0, 1], dtype='int64', name='split_idx')
1586
)
1587
)
1588
target = [
1589
pd.DatetimeIndex(['2018-01-01', '2018-01-02'],
1590
dtype='datetime64[ns]', name='split_0', freq=None),
1591
pd.DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'],
1592
dtype='datetime64[ns]', name='split_1', freq=None)
1593
]
1594
for i in range(len(target)):
1595
pd.testing.assert_index_equal(
1596
indexes1[i],
1597
target[i]
1598
)
1599
pd.testing.assert_frame_equal(
1600
df2,
1601
pd.DataFrame(
1602
np.array([
1603
[3.0, 4.0]
1604
]),
1605
index=pd.RangeIndex(start=0, stop=1, step=1),
1606
columns=pd.Index([0, 1], dtype='int64', name='split_idx')
1607
)
1608
)
1609
target = [
1610
pd.DatetimeIndex(['2018-01-03'], dtype='datetime64[ns]', name='split_0', freq=None),
1611
pd.DatetimeIndex(['2018-01-04'], dtype='datetime64[ns]', name='split_1', freq=None)
1612
]
1613
for i in range(len(target)):
1614
pd.testing.assert_index_equal(
1615
indexes2[i],
1616
target[i]
1617
)
1618
pd.testing.assert_frame_equal(
1619
df3,
1620
pd.DataFrame(
1621
np.array([
1622
[4.0, np.nan]
1623
]),
1624
index=pd.RangeIndex(start=0, stop=1, step=1),
1625
columns=pd.Index([0, 1], dtype='int64', name='split_idx')
1626
)
1627
)
1628
target = [
1629
pd.DatetimeIndex(['2018-01-04'], dtype='datetime64[ns]', name='split_0', freq=None),
1630
pd.DatetimeIndex(['2018-01-05'], dtype='datetime64[ns]', name='split_1', freq=None)
1631
]
1632
for i in range(len(target)):
1633
pd.testing.assert_index_equal(
1634
indexes3[i],
1635
target[i]
1636
)
1637
df1, indexes1 = df['a'].vbt.expanding_split(n=2, min_len=2)
1638
pd.testing.assert_frame_equal(
1639
df1,
1640
pd.DataFrame(
1641
np.array([
1642
[1.0, 1.0],
1643
[2.0, 2.0],
1644
[np.nan, 3.0],
1645
[np.nan, 4.0],
1646
[np.nan, np.nan]
1647
]),
1648
index=pd.RangeIndex(start=0, stop=5, step=1),
1649
columns=pd.Index([0, 1], dtype='int64', name='split_idx')
1650
)
1651
)
1652
target = [
1653
pd.DatetimeIndex(['2018-01-01', '2018-01-02'],
1654
dtype='datetime64[ns]', name='split_0', freq=None),
1655
pd.DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04', '2018-01-05'],
1656
dtype='datetime64[ns]', name='split_1', freq=None)
1657
]
1658
for i in range(len(target)):
1659
pd.testing.assert_index_equal(
1660
indexes1[i],
1661
target[i]
1662
)
1663
with pytest.raises(Exception):
1664
df.vbt.expanding_split(n=2, min_len=10)
1665
with pytest.raises(Exception):
1666
df.vbt.expanding_split(n=10)
1667
1668
def test_crossed_above(self):
1669
sr1 = pd.Series([np.nan, 3, 2, 1, 2, 3, 4])
1670
sr2 = pd.Series([1, 2, 3, 4, 3, 2, 1])
1671
pd.testing.assert_series_equal(
1672
sr1.vbt.crossed_above(sr2),
1673
pd.Series([False, False, False, False, False, True, False])
1674
)
1675
pd.testing.assert_series_equal(
1676
sr1.vbt.crossed_above(sr2, wait=1),
1677
pd.Series([False, False, False, False, False, False, True])
1678
)
1679
sr3 = pd.Series([1, 2, 3, np.nan, 5, 1, 5])
1680
sr4 = pd.Series([3, 2, 1, 1, 1, 5, 1])
1681
pd.testing.assert_series_equal(
1682
sr3.vbt.crossed_above(sr4),
1683
pd.Series([False, False, True, False, False, False, True])
1684
)
1685
pd.testing.assert_series_equal(
1686
sr3.vbt.crossed_above(sr4, wait=1),
1687
pd.Series([False, False, False, False, False, False, False])
1688
)
1689
1690
def test_crossed_below(self):
1691
sr1 = pd.Series([np.nan, 3, 2, 1, 2, 3, 4])
1692
sr2 = pd.Series([1, 2, 3, 4, 3, 2, 1])
1693
pd.testing.assert_series_equal(
1694
sr1.vbt.crossed_below(sr2),
1695
pd.Series([False, False, True, False, False, False, False])
1696
)
1697
pd.testing.assert_series_equal(
1698
sr1.vbt.crossed_below(sr2, wait=1),
1699
pd.Series([False, False, False, True, False, False, False])
1700
)
1701
sr3 = pd.Series([1, 2, 3, np.nan, 5, 1, 5])
1702
sr4 = pd.Series([3, 2, 1, 1, 1, 5, 1])
1703
pd.testing.assert_series_equal(
1704
sr3.vbt.crossed_above(sr4),
1705
pd.Series([False, False, True, False, False, False, True])
1706
)
1707
pd.testing.assert_series_equal(
1708
sr3.vbt.crossed_above(sr4, wait=1),
1709
pd.Series([False, False, False, False, False, False, False])
1710
)
1711
1712
def test_stats(self):
1713
stats_index = pd.Index([
1714
'Start', 'End', 'Period', 'Count', 'Mean', 'Std', 'Min', 'Median', 'Max', 'Min Index', 'Max Index'
1715
], dtype='object')
1716
pd.testing.assert_series_equal(
1717
df.vbt.stats(),
1718
pd.Series([
1719
pd.Timestamp('2018-01-01 00:00:00'),
1720
pd.Timestamp('2018-01-05 00:00:00'),
1721
pd.Timedelta('5 days 00:00:00'),
1722
4.0, 2.1666666666666665, 1.0531130555537456, 1.0, 2.1666666666666665, 3.3333333333333335
1723
],
1724
index=stats_index[:-2],
1725
name='agg_func_mean'
1726
)
1727
)
1728
pd.testing.assert_series_equal(
1729
df.vbt.stats(column='a'),
1730
pd.Series([
1731
pd.Timestamp('2018-01-01 00:00:00'),
1732
pd.Timestamp('2018-01-05 00:00:00'),
1733
pd.Timedelta('5 days 00:00:00'),
1734
4, 2.5, 1.2909944487358056, 1.0, 2.5, 4.0,
1735
pd.Timestamp('2018-01-01 00:00:00'),
1736
pd.Timestamp('2018-01-04 00:00:00')
1737
],
1738
index=stats_index,
1739
name='a'
1740
)
1741
)
1742
pd.testing.assert_series_equal(
1743
df.vbt.stats(column='g1', group_by=group_by),
1744
pd.Series([
1745
pd.Timestamp('2018-01-01 00:00:00'),
1746
pd.Timestamp('2018-01-05 00:00:00'),
1747
pd.Timedelta('5 days 00:00:00'),
1748
8, 2.5, 1.1952286093343936, 1.0, 2.5, 4.0,
1749
pd.Timestamp('2018-01-01 00:00:00'),
1750
pd.Timestamp('2018-01-02 00:00:00')
1751
],
1752
index=stats_index,
1753
name='g1'
1754
)
1755
)
1756
pd.testing.assert_series_equal(
1757
df['c'].vbt.stats(),
1758
df.vbt.stats(column='c')
1759
)
1760
pd.testing.assert_series_equal(
1761
df['c'].vbt.stats(),
1762
df.vbt.stats(column='c', group_by=False)
1763
)
1764
pd.testing.assert_series_equal(
1765
df.vbt(group_by=group_by)['g2'].stats(),
1766
df.vbt(group_by=group_by).stats(column='g2')
1767
)
1768
pd.testing.assert_series_equal(
1769
df.vbt(group_by=group_by)['g2'].stats(),
1770
df.vbt.stats(column='g2', group_by=group_by)
1771
)
1772
stats_df = df.vbt.stats(agg_func=None)
1773
assert stats_df.shape == (3, 11)
1774
pd.testing.assert_index_equal(stats_df.index, df.vbt.wrapper.columns)
1775
pd.testing.assert_index_equal(stats_df.columns, stats_index)
1776
1777
def test_stats_mapping(self):
1778
mapping = {x: 'test_' + str(x) for x in pd.unique(df.values.flatten())}
1779
stats_index = pd.Index([
1780
'Start', 'End', 'Period', 'Value Counts: test_1.0',
1781
'Value Counts: test_2.0', 'Value Counts: test_3.0',
1782
'Value Counts: test_4.0', 'Value Counts: test_nan'
1783
], dtype='object')
1784
pd.testing.assert_series_equal(
1785
df.vbt(mapping=mapping).stats(),
1786
pd.Series([
1787
pd.Timestamp('2018-01-01 00:00:00'),
1788
pd.Timestamp('2018-01-05 00:00:00'),
1789
pd.Timedelta('5 days 00:00:00'),
1790
1.3333333333333333, 1.3333333333333333, 0.6666666666666666, 0.6666666666666666, 1.0
1791
],
1792
index=stats_index,
1793
name='agg_func_mean'
1794
)
1795
)
1796
pd.testing.assert_series_equal(
1797
df.vbt(mapping=mapping).stats(column='a'),
1798
pd.Series([
1799
pd.Timestamp('2018-01-01 00:00:00'),
1800
pd.Timestamp('2018-01-05 00:00:00'),
1801
pd.Timedelta('5 days 00:00:00'),
1802
1, 1, 1, 1, 1
1803
],
1804
index=stats_index,
1805
name='a'
1806
)
1807
)
1808
pd.testing.assert_series_equal(
1809
df.vbt(mapping=mapping).stats(column='g1', group_by=group_by),
1810
pd.Series([
1811
pd.Timestamp('2018-01-01 00:00:00'),
1812
pd.Timestamp('2018-01-05 00:00:00'),
1813
pd.Timedelta('5 days 00:00:00'),
1814
2, 2, 2, 2, 2
1815
],
1816
index=stats_index,
1817
name='g1'
1818
)
1819
)
1820
pd.testing.assert_series_equal(
1821
df.vbt(mapping=mapping).stats(),
1822
df.vbt.stats(settings=dict(mapping=mapping))
1823
)
1824
pd.testing.assert_series_equal(
1825
df['c'].vbt(mapping=mapping).stats(settings=dict(incl_all_keys=True)),
1826
df.vbt(mapping=mapping).stats(column='c')
1827
)
1828
pd.testing.assert_series_equal(
1829
df['c'].vbt(mapping=mapping).stats(settings=dict(incl_all_keys=True)),
1830
df.vbt(mapping=mapping).stats(column='c', group_by=False)
1831
)
1832
pd.testing.assert_series_equal(
1833
df.vbt(mapping=mapping, group_by=group_by)['g2'].stats(settings=dict(incl_all_keys=True)),
1834
df.vbt(mapping=mapping, group_by=group_by).stats(column='g2')
1835
)
1836
pd.testing.assert_series_equal(
1837
df.vbt(mapping=mapping, group_by=group_by)['g2'].stats(settings=dict(incl_all_keys=True)),
1838
df.vbt(mapping=mapping).stats(column='g2', group_by=group_by)
1839
)
1840
stats_df = df.vbt(mapping=mapping).stats(agg_func=None)
1841
assert stats_df.shape == (3, 8)
1842
pd.testing.assert_index_equal(stats_df.index, df.vbt.wrapper.columns)
1843
pd.testing.assert_index_equal(stats_df.columns, stats_index)
1844
1845