Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
polakowo
GitHub Repository: polakowo/vectorbt
Path: blob/master/tests/test_generic.py
1071 views
1
from datetime import datetime
2
from itertools import product
3
4
import numpy as np
5
import pandas as pd
6
import pytest
7
from numba import njit
8
from sklearn.model_selection import TimeSeriesSplit
9
10
import vectorbt as vbt
11
from vectorbt.generic import nb
12
13
seed = 42
14
15
day_dt = np.timedelta64(86400000000000)
16
17
df = pd.DataFrame({
18
'a': [1, 2, 3, 4, np.nan],
19
'b': [np.nan, 4, 3, 2, 1],
20
'c': [1, 2, np.nan, 2, 1]
21
}, index=pd.DatetimeIndex([
22
datetime(2018, 1, 1),
23
datetime(2018, 1, 2),
24
datetime(2018, 1, 3),
25
datetime(2018, 1, 4),
26
datetime(2018, 1, 5)
27
]))
28
group_by = np.array(['g1', 'g1', 'g2'])
29
30
31
@njit
32
def i_or_col_pow_nb(i_or_col, x, pow):
33
return np.power(x, pow)
34
35
36
@njit
37
def pow_nb(x, pow):
38
return np.power(x, pow)
39
40
41
@njit
42
def nanmean_nb(x):
43
return np.nanmean(x)
44
45
46
@njit
47
def i_col_nanmean_nb(i, col, x):
48
return np.nanmean(x)
49
50
51
@njit
52
def i_nanmean_nb(i, x):
53
return np.nanmean(x)
54
55
56
@njit
57
def col_nanmean_nb(col, x):
58
return np.nanmean(x)
59
60
61
# ############# Global ############# #
62
63
def setup_module():
64
vbt.settings.numba['check_func_suffix'] = True
65
vbt.settings.caching.enabled = False
66
vbt.settings.caching.whitelist = []
67
vbt.settings.caching.blacklist = []
68
69
70
def teardown_module():
71
vbt.settings.reset()
72
73
74
# ############# accessors.py ############# #
75
76
77
class TestAccessors:
78
def test_indexing(self):
79
assert df.vbt['a'].min() == df['a'].vbt.min()
80
81
def test_set_by_mask(self):
82
np.testing.assert_array_equal(
83
nb.set_by_mask_1d_nb(
84
np.array([1, 2, 3, 1, 2, 3]),
85
np.array([True, False, False, True, False, False]),
86
0
87
),
88
np.array([0, 2, 3, 0, 2, 3])
89
)
90
np.testing.assert_array_equal(
91
nb.set_by_mask_1d_nb(
92
np.array([1, 2, 3, 1, 2, 3]),
93
np.array([True, False, False, True, False, False]),
94
0.
95
),
96
np.array([0., 2., 3., 0., 2., 3.])
97
)
98
np.testing.assert_array_equal(
99
nb.set_by_mask_nb(
100
np.array([1, 2, 3, 1, 2, 3])[:, None],
101
np.array([True, False, False, True, False, False])[:, None],
102
0
103
),
104
np.array([0, 2, 3, 0, 2, 3])[:, None]
105
)
106
np.testing.assert_array_equal(
107
nb.set_by_mask_nb(
108
np.array([1, 2, 3, 1, 2, 3])[:, None],
109
np.array([True, False, False, True, False, False])[:, None],
110
0.
111
),
112
np.array([0., 2., 3., 0., 2., 3.])[:, None]
113
)
114
np.testing.assert_array_equal(
115
nb.set_by_mask_mult_1d_nb(
116
np.array([1, 2, 3, 1, 2, 3]),
117
np.array([True, False, False, True, False, False]),
118
np.array([0, -1, -1, 0, -1, -1])
119
),
120
np.array([0, 2, 3, 0, 2, 3])
121
)
122
np.testing.assert_array_equal(
123
nb.set_by_mask_mult_1d_nb(
124
np.array([1, 2, 3, 1, 2, 3]),
125
np.array([True, False, False, True, False, False]),
126
np.array([0., -1., -1., 0., -1., -1.])
127
),
128
np.array([0., 2., 3., 0., 2., 3.])
129
)
130
np.testing.assert_array_equal(
131
nb.set_by_mask_mult_nb(
132
np.array([1, 2, 3, 1, 2, 3])[:, None],
133
np.array([True, False, False, True, False, False])[:, None],
134
np.array([0, -1, -1, 0, -1, -1])[:, None]
135
),
136
np.array([0, 2, 3, 0, 2, 3])[:, None]
137
)
138
np.testing.assert_array_equal(
139
nb.set_by_mask_mult_nb(
140
np.array([1, 2, 3, 1, 2, 3])[:, None],
141
np.array([True, False, False, True, False, False])[:, None],
142
np.array([0., -1., -1., 0., -1., -1.])[:, None]
143
),
144
np.array([0., 2., 3., 0., 2., 3.])[:, None]
145
)
146
147
def test_shuffle(self):
148
pd.testing.assert_series_equal(
149
df['a'].vbt.shuffle(seed=seed),
150
pd.Series(
151
np.array([2.0, np.nan, 3.0, 1.0, 4.0]),
152
index=df['a'].index,
153
name=df['a'].name
154
)
155
)
156
np.testing.assert_array_equal(
157
df['a'].vbt.shuffle(seed=seed).values,
158
nb.shuffle_1d_nb(df['a'].values, seed=seed)
159
)
160
pd.testing.assert_frame_equal(
161
df.vbt.shuffle(seed=seed),
162
pd.DataFrame(
163
np.array([
164
[2., 2., 2.],
165
[np.nan, 4., 1.],
166
[3., 3., 2.],
167
[1., np.nan, 1.],
168
[4., 1., np.nan]
169
]),
170
index=df.index,
171
columns=df.columns
172
)
173
)
174
175
@pytest.mark.parametrize(
176
"test_value",
177
[-1, 0., np.nan],
178
)
179
def test_fillna(self, test_value):
180
pd.testing.assert_series_equal(df['a'].vbt.fillna(test_value), df['a'].fillna(test_value))
181
pd.testing.assert_frame_equal(df.vbt.fillna(test_value), df.fillna(test_value))
182
pd.testing.assert_series_equal(
183
pd.Series([1, 2, 3]).vbt.fillna(-1),
184
pd.Series([1, 2, 3]))
185
pd.testing.assert_series_equal(
186
pd.Series([False, True, False]).vbt.fillna(False),
187
pd.Series([False, True, False]))
188
189
@pytest.mark.parametrize(
190
"test_n",
191
[1, 2, 3, 4, 5],
192
)
193
def test_bshift(self, test_n):
194
pd.testing.assert_series_equal(df['a'].vbt.bshift(test_n), df['a'].shift(-test_n))
195
np.testing.assert_array_equal(
196
df['a'].vbt.bshift(test_n).values,
197
nb.bshift_1d_nb(df['a'].values, test_n)
198
)
199
pd.testing.assert_frame_equal(df.vbt.bshift(test_n), df.shift(-test_n))
200
pd.testing.assert_series_equal(
201
pd.Series([1, 2, 3]).vbt.bshift(1, fill_value=-1),
202
pd.Series([2, 3, -1])
203
)
204
pd.testing.assert_series_equal(
205
pd.Series([True, True, True]).vbt.bshift(1, fill_value=False),
206
pd.Series([True, True, False])
207
)
208
209
@pytest.mark.parametrize(
210
"test_n",
211
[1, 2, 3, 4, 5],
212
)
213
def test_fshift(self, test_n):
214
pd.testing.assert_series_equal(df['a'].vbt.fshift(test_n), df['a'].shift(test_n))
215
np.testing.assert_array_equal(
216
df['a'].vbt.fshift(test_n).values,
217
nb.fshift_1d_nb(df['a'].values, test_n)
218
)
219
pd.testing.assert_frame_equal(df.vbt.fshift(test_n), df.shift(test_n))
220
pd.testing.assert_series_equal(
221
pd.Series([1, 2, 3]).vbt.fshift(1, fill_value=-1),
222
pd.Series([-1, 1, 2])
223
)
224
pd.testing.assert_series_equal(
225
pd.Series([True, True, True]).vbt.fshift(1, fill_value=False),
226
pd.Series([False, True, True])
227
)
228
229
def test_diff(self):
230
pd.testing.assert_series_equal(df['a'].vbt.diff(), df['a'].diff())
231
np.testing.assert_array_equal(df['a'].vbt.diff().values, nb.diff_1d_nb(df['a'].values))
232
pd.testing.assert_frame_equal(df.vbt.diff(), df.diff())
233
234
def test_pct_change(self):
235
pd.testing.assert_series_equal(df['a'].vbt.pct_change(), df['a'].pct_change(fill_method=None))
236
np.testing.assert_array_equal(df['a'].vbt.pct_change().values, nb.pct_change_1d_nb(df['a'].values))
237
pd.testing.assert_frame_equal(df.vbt.pct_change(), df.pct_change(fill_method=None))
238
239
def test_bfill(self):
240
pd.testing.assert_series_equal(df['b'].vbt.bfill(), df['b'].bfill())
241
pd.testing.assert_frame_equal(df.vbt.bfill(), df.bfill())
242
243
def test_ffill(self):
244
pd.testing.assert_series_equal(df['a'].vbt.ffill(), df['a'].ffill())
245
pd.testing.assert_frame_equal(df.vbt.ffill(), df.ffill())
246
247
def test_product(self):
248
assert df['a'].vbt.product() == df['a'].product()
249
np.testing.assert_array_equal(df.vbt.product(), df.product())
250
251
def test_cumsum(self):
252
pd.testing.assert_series_equal(df['a'].vbt.cumsum(), df['a'].cumsum().ffill().fillna(0))
253
pd.testing.assert_frame_equal(df.vbt.cumsum(), df.cumsum().ffill().fillna(0))
254
255
def test_cumprod(self):
256
pd.testing.assert_series_equal(df['a'].vbt.cumprod(), df['a'].cumprod().ffill().fillna(1))
257
pd.testing.assert_frame_equal(df.vbt.cumprod(), df.cumprod().ffill().fillna(1))
258
259
@pytest.mark.parametrize(
260
"test_window,test_minp",
261
list(product([1, 2, 3, 4, 5], [1, None]))
262
)
263
def test_rolling_min(self, test_window, test_minp):
264
if test_minp is None:
265
test_minp = test_window
266
pd.testing.assert_series_equal(
267
df['a'].vbt.rolling_min(test_window, minp=test_minp),
268
df['a'].rolling(test_window, min_periods=test_minp).min()
269
)
270
pd.testing.assert_frame_equal(
271
df.vbt.rolling_min(test_window, minp=test_minp),
272
df.rolling(test_window, min_periods=test_minp).min()
273
)
274
pd.testing.assert_frame_equal(
275
df.vbt.rolling_min(test_window),
276
df.rolling(test_window).min()
277
)
278
279
@pytest.mark.parametrize(
280
"test_window,test_minp",
281
list(product([1, 2, 3, 4, 5], [1, None]))
282
)
283
def test_rolling_max(self, test_window, test_minp):
284
if test_minp is None:
285
test_minp = test_window
286
pd.testing.assert_series_equal(
287
df['a'].vbt.rolling_max(test_window, minp=test_minp),
288
df['a'].rolling(test_window, min_periods=test_minp).max()
289
)
290
pd.testing.assert_frame_equal(
291
df.vbt.rolling_max(test_window, minp=test_minp),
292
df.rolling(test_window, min_periods=test_minp).max()
293
)
294
pd.testing.assert_frame_equal(
295
df.vbt.rolling_max(test_window),
296
df.rolling(test_window).max()
297
)
298
299
@pytest.mark.parametrize(
300
"test_window,test_minp",
301
list(product([1, 2, 3, 4, 5], [1, None]))
302
)
303
def test_rolling_mean(self, test_window, test_minp):
304
if test_minp is None:
305
test_minp = test_window
306
pd.testing.assert_series_equal(
307
df['a'].vbt.rolling_mean(test_window, minp=test_minp),
308
df['a'].rolling(test_window, min_periods=test_minp).mean()
309
)
310
pd.testing.assert_frame_equal(
311
df.vbt.rolling_mean(test_window, minp=test_minp),
312
df.rolling(test_window, min_periods=test_minp).mean()
313
)
314
pd.testing.assert_frame_equal(
315
df.vbt.rolling_mean(test_window),
316
df.rolling(test_window).mean()
317
)
318
319
@pytest.mark.parametrize(
320
"test_window,test_minp,test_ddof",
321
list(product([1, 2, 3, 4, 5], [1, None], [0, 1]))
322
)
323
def test_rolling_std(self, test_window, test_minp, test_ddof):
324
if test_minp is None:
325
test_minp = test_window
326
pd.testing.assert_series_equal(
327
df['a'].vbt.rolling_std(test_window, minp=test_minp, ddof=test_ddof),
328
df['a'].rolling(test_window, min_periods=test_minp).std(ddof=test_ddof)
329
)
330
pd.testing.assert_frame_equal(
331
df.vbt.rolling_std(test_window, minp=test_minp, ddof=test_ddof),
332
df.rolling(test_window, min_periods=test_minp).std(ddof=test_ddof)
333
)
334
pd.testing.assert_frame_equal(
335
df.vbt.rolling_std(test_window),
336
df.rolling(test_window).std()
337
)
338
339
@pytest.mark.parametrize(
340
"test_window,test_minp,test_adjust",
341
list(product([1, 2, 3, 4, 5], [1, None], [False, True]))
342
)
343
def test_ewm_mean(self, test_window, test_minp, test_adjust):
344
if test_minp is None:
345
test_minp = test_window
346
pd.testing.assert_series_equal(
347
df['a'].vbt.ewm_mean(test_window, minp=test_minp, adjust=test_adjust),
348
df['a'].ewm(span=test_window, min_periods=test_minp, adjust=test_adjust).mean()
349
)
350
pd.testing.assert_frame_equal(
351
df.vbt.ewm_mean(test_window, minp=test_minp, adjust=test_adjust),
352
df.ewm(span=test_window, min_periods=test_minp, adjust=test_adjust).mean()
353
)
354
pd.testing.assert_frame_equal(
355
df.vbt.ewm_mean(test_window),
356
df.ewm(span=test_window).mean()
357
)
358
359
@pytest.mark.parametrize(
360
"test_window,test_minp,test_adjust",
361
list(product([1, 2, 3, 4, 5], [1, None], [False, True]))
362
)
363
def test_ewm_std(self, test_window, test_minp, test_adjust):
364
if test_minp is None:
365
test_minp = test_window
366
pd.testing.assert_series_equal(
367
df['a'].vbt.ewm_std(test_window, minp=test_minp, adjust=test_adjust),
368
df['a'].ewm(span=test_window, min_periods=test_minp, adjust=test_adjust).std()
369
)
370
pd.testing.assert_frame_equal(
371
df.vbt.ewm_std(test_window, minp=test_minp, adjust=test_adjust),
372
df.ewm(span=test_window, min_periods=test_minp, adjust=test_adjust).std()
373
)
374
pd.testing.assert_frame_equal(
375
df.vbt.ewm_std(test_window),
376
df.ewm(span=test_window).std()
377
)
378
379
@pytest.mark.parametrize(
380
"test_minp",
381
[1, 3]
382
)
383
def test_expanding_min(self, test_minp):
384
pd.testing.assert_series_equal(
385
df['a'].vbt.expanding_min(minp=test_minp),
386
df['a'].expanding(min_periods=test_minp).min()
387
)
388
pd.testing.assert_frame_equal(
389
df.vbt.expanding_min(minp=test_minp),
390
df.expanding(min_periods=test_minp).min()
391
)
392
pd.testing.assert_frame_equal(
393
df.vbt.expanding_min(),
394
df.expanding().min()
395
)
396
397
@pytest.mark.parametrize(
398
"test_minp",
399
[1, 3]
400
)
401
def test_expanding_max(self, test_minp):
402
pd.testing.assert_series_equal(
403
df['a'].vbt.expanding_max(minp=test_minp),
404
df['a'].expanding(min_periods=test_minp).max()
405
)
406
pd.testing.assert_frame_equal(
407
df.vbt.expanding_max(minp=test_minp),
408
df.expanding(min_periods=test_minp).max()
409
)
410
pd.testing.assert_frame_equal(
411
df.vbt.expanding_max(),
412
df.expanding().max()
413
)
414
415
@pytest.mark.parametrize(
416
"test_minp",
417
[1, 3]
418
)
419
def test_expanding_mean(self, test_minp):
420
pd.testing.assert_series_equal(
421
df['a'].vbt.expanding_mean(minp=test_minp),
422
df['a'].expanding(min_periods=test_minp).mean()
423
)
424
pd.testing.assert_frame_equal(
425
df.vbt.expanding_mean(minp=test_minp),
426
df.expanding(min_periods=test_minp).mean()
427
)
428
pd.testing.assert_frame_equal(
429
df.vbt.expanding_mean(),
430
df.expanding().mean()
431
)
432
433
@pytest.mark.parametrize(
434
"test_minp,test_ddof",
435
list(product([1, 3], [0, 1]))
436
)
437
def test_expanding_std(self, test_minp, test_ddof):
438
pd.testing.assert_series_equal(
439
df['a'].vbt.expanding_std(minp=test_minp, ddof=test_ddof),
440
df['a'].expanding(min_periods=test_minp).std(ddof=test_ddof)
441
)
442
pd.testing.assert_frame_equal(
443
df.vbt.expanding_std(minp=test_minp, ddof=test_ddof),
444
df.expanding(min_periods=test_minp).std(ddof=test_ddof)
445
)
446
pd.testing.assert_frame_equal(
447
df.vbt.expanding_std(),
448
df.expanding().std()
449
)
450
451
def test_apply_along_axis(self):
452
pd.testing.assert_frame_equal(
453
df.vbt.apply_along_axis(i_or_col_pow_nb, 2, axis=0),
454
df.apply(pow_nb, args=(2,), axis=0, raw=True)
455
)
456
pd.testing.assert_frame_equal(
457
df.vbt.apply_along_axis(i_or_col_pow_nb, 2, axis=1),
458
df.apply(pow_nb, args=(2,), axis=1, raw=True)
459
)
460
461
@pytest.mark.parametrize(
462
"test_window,test_minp",
463
list(product([1, 2, 3, 4, 5], [1, None]))
464
)
465
def test_rolling_apply(self, test_window, test_minp):
466
if test_minp is None:
467
test_minp = test_window
468
pd.testing.assert_series_equal(
469
df['a'].vbt.rolling_apply(test_window, i_col_nanmean_nb, minp=test_minp),
470
df['a'].rolling(test_window, min_periods=test_minp).apply(nanmean_nb, raw=True)
471
)
472
pd.testing.assert_frame_equal(
473
df.vbt.rolling_apply(test_window, i_col_nanmean_nb, minp=test_minp),
474
df.rolling(test_window, min_periods=test_minp).apply(nanmean_nb, raw=True)
475
)
476
pd.testing.assert_frame_equal(
477
df.vbt.rolling_apply(test_window, i_col_nanmean_nb),
478
df.rolling(test_window).apply(nanmean_nb, raw=True)
479
)
480
pd.testing.assert_frame_equal(
481
df.vbt.rolling_apply(3, i_nanmean_nb, on_matrix=True),
482
pd.DataFrame(
483
np.array([
484
[np.nan, np.nan, np.nan],
485
[np.nan, np.nan, np.nan],
486
[np.nan, np.nan, np.nan],
487
[2.75, 2.75, 2.75],
488
[np.nan, np.nan, np.nan]
489
]),
490
index=df.index,
491
columns=df.columns
492
)
493
)
494
495
@pytest.mark.parametrize(
496
"test_minp",
497
[1, 3]
498
)
499
def test_expanding_apply(self, test_minp):
500
pd.testing.assert_series_equal(
501
df['a'].vbt.expanding_apply(i_col_nanmean_nb, minp=test_minp),
502
df['a'].expanding(min_periods=test_minp).apply(nanmean_nb, raw=True)
503
)
504
pd.testing.assert_frame_equal(
505
df.vbt.expanding_apply(i_col_nanmean_nb, minp=test_minp),
506
df.expanding(min_periods=test_minp).apply(nanmean_nb, raw=True)
507
)
508
pd.testing.assert_frame_equal(
509
df.vbt.expanding_apply(i_col_nanmean_nb),
510
df.expanding().apply(nanmean_nb, raw=True)
511
)
512
pd.testing.assert_frame_equal(
513
df.vbt.expanding_apply(i_nanmean_nb, on_matrix=True),
514
pd.DataFrame(
515
np.array([
516
[np.nan, np.nan, np.nan],
517
[2.0, 2.0, 2.0],
518
[2.2857142857142856, 2.2857142857142856, 2.2857142857142856],
519
[2.4, 2.4, 2.4],
520
[2.1666666666666665, 2.1666666666666665, 2.1666666666666665]
521
]),
522
index=df.index,
523
columns=df.columns
524
)
525
)
526
527
def test_groupby_apply(self):
528
pd.testing.assert_series_equal(
529
df['a'].vbt.groupby_apply(np.asarray([1, 1, 2, 2, 3]), i_col_nanmean_nb),
530
df['a'].groupby(np.asarray([1, 1, 2, 2, 3])).apply(lambda x: nanmean_nb(x.values))
531
)
532
pd.testing.assert_frame_equal(
533
df.vbt.groupby_apply(np.asarray([1, 1, 2, 2, 3]), i_col_nanmean_nb),
534
df.groupby(np.asarray([1, 1, 2, 2, 3])).agg({
535
'a': lambda x: nanmean_nb(x.values),
536
'b': lambda x: nanmean_nb(x.values),
537
'c': lambda x: nanmean_nb(x.values)
538
}), # any clean way to do column-wise grouping in pandas?
539
)
540
541
def test_groupby_apply_on_matrix(self):
542
pd.testing.assert_frame_equal(
543
df.vbt.groupby_apply(np.asarray([1, 1, 2, 2, 3]), i_nanmean_nb, on_matrix=True),
544
pd.DataFrame(
545
np.array([
546
[2., 2., 2.],
547
[2.8, 2.8, 2.8],
548
[1., 1., 1.]
549
]),
550
index=pd.Index([1, 2, 3], dtype='int64'),
551
columns=df.columns
552
)
553
)
554
555
@pytest.mark.parametrize(
556
"test_freq",
557
['1h', '3d', '1w'],
558
)
559
def test_resample_apply(self, test_freq):
560
pd.testing.assert_series_equal(
561
df['a'].vbt.resample_apply(test_freq, i_col_nanmean_nb),
562
df['a'].resample(test_freq).apply(lambda x: nanmean_nb(x.values))
563
)
564
pd.testing.assert_frame_equal(
565
df.vbt.resample_apply(test_freq, i_col_nanmean_nb),
566
df.resample(test_freq).apply(lambda x: nanmean_nb(x.values))
567
)
568
pd.testing.assert_frame_equal(
569
df.vbt.resample_apply('3d', i_nanmean_nb, on_matrix=True),
570
pd.DataFrame(
571
np.array([
572
[2.28571429, 2.28571429, 2.28571429],
573
[2., 2., 2.]
574
]),
575
index=pd.DatetimeIndex(['2018-01-01', '2018-01-04'], dtype='datetime64[ns]', freq='3D'),
576
columns=df.columns
577
)
578
)
579
580
def test_applymap(self):
581
@njit
582
def mult_nb(i, col, x):
583
return x * 2
584
585
pd.testing.assert_series_equal(
586
df['a'].vbt.applymap(mult_nb),
587
df['a'].map(lambda x: x * 2)
588
)
589
pd.testing.assert_frame_equal(
590
df.vbt.applymap(mult_nb),
591
df.applymap(lambda x: x * 2)
592
)
593
594
def test_filter(self):
595
@njit
596
def greater_nb(i, col, x):
597
return x > 2
598
599
pd.testing.assert_series_equal(
600
df['a'].vbt.filter(greater_nb),
601
df['a'].map(lambda x: x if x > 2 else np.nan)
602
)
603
pd.testing.assert_frame_equal(
604
df.vbt.filter(greater_nb),
605
df.applymap(lambda x: x if x > 2 else np.nan)
606
)
607
608
def test_apply_and_reduce(self):
609
@njit
610
def every_nth_nb(col, a, n):
611
return a[::n]
612
613
@njit
614
def sum_nb(col, a, b):
615
return np.nansum(a) + b
616
617
assert df['a'].vbt.apply_and_reduce(every_nth_nb, sum_nb, apply_args=(2,), reduce_args=(3,)) == \
618
df['a'].iloc[::2].sum() + 3
619
pd.testing.assert_series_equal(
620
df.vbt.apply_and_reduce(every_nth_nb, sum_nb, apply_args=(2,), reduce_args=(3,)),
621
df.iloc[::2].sum().rename('apply_and_reduce') + 3
622
)
623
pd.testing.assert_series_equal(
624
df.vbt.apply_and_reduce(
625
every_nth_nb, sum_nb, apply_args=(2,),
626
reduce_args=(3,), wrap_kwargs=dict(to_timedelta=True)),
627
(df.iloc[::2].sum().rename('apply_and_reduce') + 3) * day_dt
628
)
629
630
def test_reduce(self):
631
@njit
632
def sum_nb(col, a):
633
return np.nansum(a)
634
635
assert df['a'].vbt.reduce(sum_nb) == df['a'].sum()
636
pd.testing.assert_series_equal(
637
df.vbt.reduce(sum_nb),
638
df.sum().rename('reduce')
639
)
640
pd.testing.assert_series_equal(
641
df.vbt.reduce(sum_nb, wrap_kwargs=dict(to_timedelta=True)),
642
df.sum().rename('reduce') * day_dt
643
)
644
pd.testing.assert_series_equal(
645
df.vbt.reduce(sum_nb, group_by=group_by),
646
pd.Series([20.0, 6.0], index=['g1', 'g2']).rename('reduce')
647
)
648
649
@njit
650
def argmax_nb(col, a):
651
a = a.copy()
652
a[np.isnan(a)] = -np.inf
653
return np.argmax(a)
654
655
assert df['a'].vbt.reduce(argmax_nb, returns_idx=True) == df['a'].idxmax()
656
pd.testing.assert_series_equal(
657
df.vbt.reduce(argmax_nb, returns_idx=True),
658
df.idxmax().rename('reduce')
659
)
660
pd.testing.assert_series_equal(
661
df.vbt.reduce(argmax_nb, returns_idx=True, flatten=True, group_by=group_by),
662
pd.Series(['2018-01-02', '2018-01-02'], dtype='datetime64[ns]', index=['g1', 'g2']).rename('reduce')
663
)
664
665
@njit
666
def min_and_max_nb(col, a):
667
out = np.empty(2)
668
out[0] = np.nanmin(a)
669
out[1] = np.nanmax(a)
670
return out
671
672
pd.testing.assert_series_equal(
673
df['a'].vbt.reduce(
674
min_and_max_nb, returns_array=True,
675
wrap_kwargs=dict(name_or_index=['min', 'max'])),
676
pd.Series([np.nanmin(df['a']), np.nanmax(df['a'])], index=['min', 'max'], name='a')
677
)
678
pd.testing.assert_frame_equal(
679
df.vbt.reduce(
680
min_and_max_nb, returns_array=True,
681
wrap_kwargs=dict(name_or_index=['min', 'max'])),
682
df.apply(lambda x: pd.Series(np.asarray([np.nanmin(x), np.nanmax(x)]), index=['min', 'max']), axis=0)
683
)
684
pd.testing.assert_frame_equal(
685
df.vbt.reduce(
686
min_and_max_nb, returns_array=True, group_by=group_by,
687
wrap_kwargs=dict(name_or_index=['min', 'max'])),
688
pd.DataFrame([[1.0, 1.0], [4.0, 2.0]], index=['min', 'max'], columns=['g1', 'g2'])
689
)
690
691
@njit
692
def argmin_and_argmax_nb(col, a):
693
# nanargmin and nanargmax
694
out = np.empty(2)
695
_a = a.copy()
696
_a[np.isnan(_a)] = np.inf
697
out[0] = np.argmin(_a)
698
_a = a.copy()
699
_a[np.isnan(_a)] = -np.inf
700
out[1] = np.argmax(_a)
701
return out
702
703
pd.testing.assert_series_equal(
704
df['a'].vbt.reduce(
705
argmin_and_argmax_nb, returns_idx=True, returns_array=True,
706
wrap_kwargs=dict(name_or_index=['idxmin', 'idxmax'])),
707
pd.Series([df['a'].idxmin(), df['a'].idxmax()], index=['idxmin', 'idxmax'], name='a')
708
)
709
pd.testing.assert_frame_equal(
710
df.vbt.reduce(
711
argmin_and_argmax_nb, returns_idx=True, returns_array=True,
712
wrap_kwargs=dict(name_or_index=['idxmin', 'idxmax'])),
713
df.apply(lambda x: pd.Series(np.asarray([x.idxmin(), x.idxmax()]), index=['idxmin', 'idxmax']), axis=0)
714
)
715
pd.testing.assert_frame_equal(
716
df.vbt.reduce(argmin_and_argmax_nb, returns_idx=True, returns_array=True,
717
flatten=True, order='C', group_by=group_by,
718
wrap_kwargs=dict(name_or_index=['idxmin', 'idxmax'])),
719
pd.DataFrame([['2018-01-01', '2018-01-01'], ['2018-01-02', '2018-01-02']],
720
dtype='datetime64[ns]', index=['idxmin', 'idxmax'], columns=['g1', 'g2'])
721
)
722
pd.testing.assert_frame_equal(
723
df.vbt.reduce(argmin_and_argmax_nb, returns_idx=True, returns_array=True,
724
flatten=True, order='F', group_by=group_by,
725
wrap_kwargs=dict(name_or_index=['idxmin', 'idxmax'])),
726
pd.DataFrame([['2018-01-01', '2018-01-01'], ['2018-01-04', '2018-01-02']],
727
dtype='datetime64[ns]', index=['idxmin', 'idxmax'], columns=['g1', 'g2'])
728
)
729
730
def test_squeeze_grouped(self):
731
pd.testing.assert_frame_equal(
732
df.vbt.squeeze_grouped(i_col_nanmean_nb, group_by=group_by),
733
pd.DataFrame([
734
[1.0, 1.0],
735
[3.0, 2.0],
736
[3.0, np.nan],
737
[3.0, 2.0],
738
[1.0, 1.0]
739
], index=df.index, columns=['g1', 'g2'])
740
)
741
assert df['a'].vbt.squeeze_grouped(i_col_nanmean_nb, group_by=True) == 2.5
742
743
def test_flatten_grouped(self):
744
pd.testing.assert_frame_equal(
745
df.vbt.flatten_grouped(group_by=group_by, order='C'),
746
pd.DataFrame([
747
[1.0, 1.0],
748
[np.nan, np.nan],
749
[2.0, 2.0],
750
[4.0, np.nan],
751
[3.0, np.nan],
752
[3.0, np.nan],
753
[4.0, 2.0],
754
[2.0, np.nan],
755
[np.nan, 1.0],
756
[1.0, np.nan]
757
], index=np.repeat(df.index, 2), columns=['g1', 'g2'])
758
)
759
pd.testing.assert_frame_equal(
760
df.vbt.flatten_grouped(group_by=group_by, order='F'),
761
pd.DataFrame([
762
[1.0, 1.0],
763
[2.0, 2.0],
764
[3.0, np.nan],
765
[4.0, 2.0],
766
[np.nan, 1.0],
767
[np.nan, np.nan],
768
[4.0, np.nan],
769
[3.0, np.nan],
770
[2.0, np.nan],
771
[1.0, np.nan]
772
], index=np.tile(df.index, 2), columns=['g1', 'g2'])
773
)
774
pd.testing.assert_series_equal(
775
pd.DataFrame([[False, True], [False, True]]).vbt.flatten_grouped(group_by=True, order='C'),
776
pd.Series([False, True, False, True], name='group')
777
)
778
pd.testing.assert_series_equal(
779
pd.DataFrame([[False, True], [False, True]]).vbt.flatten_grouped(group_by=True, order='F'),
780
pd.Series([False, False, True, True], name='group')
781
)
782
pd.testing.assert_frame_equal(
783
pd.Series([False, True, True, False]).vbt.flatten_grouped(group_by=[0, 0, 0, 1]),
784
pd.DataFrame([[0., 0.], [1., np.nan], [1., np.nan]], columns=pd.Index([0, 1], dtype='int64'))
785
)
786
787
@pytest.mark.parametrize(
788
"test_name,test_func,test_func_nb",
789
[
790
('min', lambda x, **kwargs: x.min(**kwargs), nb.nanmin_nb),
791
('max', lambda x, **kwargs: x.max(**kwargs), nb.nanmax_nb),
792
('mean', lambda x, **kwargs: x.mean(**kwargs), nb.nanmean_nb),
793
('median', lambda x, **kwargs: x.median(**kwargs), nb.nanmedian_nb),
794
('std', lambda x, **kwargs: x.std(**kwargs, ddof=0), nb.nanstd_nb),
795
('count', lambda x, **kwargs: x.count(**kwargs), nb.nancnt_nb),
796
('sum', lambda x, **kwargs: x.sum(**kwargs), nb.nansum_nb)
797
],
798
)
799
def test_funcs(self, test_name, test_func, test_func_nb):
800
# numeric
801
assert test_func(df['a'].vbt) == test_func(df['a'])
802
pd.testing.assert_series_equal(
803
test_func(df.vbt),
804
test_func(df).rename(test_name)
805
)
806
pd.testing.assert_series_equal(
807
test_func(df.vbt, group_by=group_by),
808
pd.Series([
809
test_func(df[['a', 'b']].stack()),
810
test_func(df['c'])
811
], index=['g1', 'g2']).rename(test_name)
812
)
813
np.testing.assert_array_equal(test_func(df).values, test_func_nb(df.values))
814
pd.testing.assert_series_equal(
815
test_func(df.vbt, wrap_kwargs=dict(to_timedelta=True)),
816
test_func(df).rename(test_name) * day_dt
817
)
818
# boolean
819
bool_ts = df == df
820
assert test_func(bool_ts['a'].vbt) == test_func(bool_ts['a'])
821
pd.testing.assert_series_equal(
822
test_func(bool_ts.vbt),
823
test_func(bool_ts).rename(test_name)
824
)
825
pd.testing.assert_series_equal(
826
test_func(bool_ts.vbt, wrap_kwargs=dict(to_timedelta=True)),
827
test_func(bool_ts).rename(test_name) * day_dt
828
)
829
830
@pytest.mark.parametrize(
831
"test_name,test_func",
832
[
833
('idxmin', lambda x, **kwargs: x.idxmin(**kwargs)),
834
('idxmax', lambda x, **kwargs: x.idxmax(**kwargs))
835
],
836
)
837
def test_arg_funcs(self, test_name, test_func):
838
assert test_func(df['a'].vbt) == test_func(df['a'])
839
pd.testing.assert_series_equal(
840
test_func(df.vbt),
841
test_func(df).rename(test_name)
842
)
843
pd.testing.assert_series_equal(
844
test_func(df.vbt, group_by=group_by),
845
pd.Series([
846
test_func(df[['a', 'b']].stack())[0],
847
test_func(df['c'])
848
], index=['g1', 'g2'], dtype='datetime64[ns]').rename(test_name)
849
)
850
851
def test_describe(self):
852
pd.testing.assert_series_equal(
853
df['a'].vbt.describe(),
854
df['a'].describe()
855
)
856
pd.testing.assert_frame_equal(
857
df.vbt.describe(percentiles=None),
858
df.describe(percentiles=None)
859
)
860
pd.testing.assert_frame_equal(
861
df.vbt.describe(percentiles=[]),
862
df.describe(percentiles=[])
863
)
864
test_against = df.describe(percentiles=np.arange(0, 1, 0.1))
865
pd.testing.assert_frame_equal(
866
df.vbt.describe(percentiles=np.arange(0, 1, 0.1)),
867
test_against
868
)
869
pd.testing.assert_frame_equal(
870
df.vbt.describe(percentiles=np.arange(0, 1, 0.1), group_by=group_by),
871
pd.DataFrame({
872
'g1': df[['a', 'b']].stack().describe(percentiles=np.arange(0, 1, 0.1)).values,
873
'g2': df['c'].describe(percentiles=np.arange(0, 1, 0.1)).values
874
}, index=test_against.index)
875
)
876
877
def test_value_counts(self):
878
pd.testing.assert_series_equal(
879
df['a'].vbt.value_counts(),
880
pd.Series(
881
np.array([1, 1, 1, 1, 1]),
882
index=pd.Index([1.0, 2.0, 3.0, 4.0, np.nan], dtype='float64'),
883
name='a'
884
)
885
)
886
mapping = {1.: 'one', 2.: 'two', 3.: 'three', 4.: 'four'}
887
pd.testing.assert_series_equal(
888
df['a'].vbt.value_counts(mapping=mapping),
889
pd.Series(
890
np.array([1, 1, 1, 1, 1]),
891
index=pd.Index(['one', 'two', 'three', 'four', None], dtype='object'),
892
name='a'
893
)
894
)
895
pd.testing.assert_frame_equal(
896
df.vbt.value_counts(),
897
pd.DataFrame(
898
np.array([
899
[1, 1, 2],
900
[1, 1, 2],
901
[1, 1, 0],
902
[1, 1, 0],
903
[1, 1, 1]
904
]),
905
index=pd.Index([1.0, 2.0, 3.0, 4.0, np.nan], dtype='float64'),
906
columns=df.columns
907
)
908
)
909
pd.testing.assert_frame_equal(
910
df.vbt.value_counts(group_by=group_by),
911
pd.DataFrame(
912
np.array([
913
[2, 2],
914
[2, 2],
915
[2, 0],
916
[2, 0],
917
[2, 1]
918
]),
919
index=pd.Index([1.0, 2.0, 3.0, 4.0, np.nan], dtype='float64'),
920
columns=pd.Index(['g1', 'g2'], dtype='object')
921
)
922
)
923
pd.testing.assert_frame_equal(
924
df.vbt.value_counts(sort_uniques=False),
925
pd.DataFrame(
926
np.array([
927
[1, 1, 2],
928
[1, 1, 2],
929
[1, 1, 0],
930
[1, 1, 0],
931
[1, 1, 1]
932
]),
933
index=pd.Index([1.0, 2.0, 4.0, 3.0, np.nan], dtype='float64'),
934
columns=df.columns
935
)
936
)
937
pd.testing.assert_frame_equal(
938
df.vbt.value_counts(sort=True),
939
pd.DataFrame(
940
np.array([
941
[1, 1, 2],
942
[1, 1, 2],
943
[1, 1, 1],
944
[1, 1, 0],
945
[1, 1, 0]
946
]),
947
index=pd.Index([1.0, 2.0, np.nan, 3.0, 4.0], dtype='float64'),
948
columns=df.columns
949
)
950
)
951
pd.testing.assert_frame_equal(
952
df.vbt.value_counts(sort=True, ascending=True),
953
pd.DataFrame(
954
np.array([
955
[1, 1, 0],
956
[1, 1, 0],
957
[1, 1, 1],
958
[1, 1, 2],
959
[1, 1, 2]
960
]),
961
index=pd.Index([3.0, 4.0, np.nan, 1.0, 2.0], dtype='float64'),
962
columns=df.columns
963
)
964
)
965
pd.testing.assert_frame_equal(
966
df.vbt.value_counts(sort=True, normalize=True),
967
pd.DataFrame(
968
np.array([
969
[0.06666666666666667, 0.06666666666666667, 0.13333333333333333],
970
[0.06666666666666667, 0.06666666666666667, 0.13333333333333333],
971
[0.06666666666666667, 0.06666666666666667, 0.06666666666666667],
972
[0.06666666666666667, 0.06666666666666667, 0.0],
973
[0.06666666666666667, 0.06666666666666667, 0.0]
974
]),
975
index=pd.Index([1.0, 2.0, np.nan, 3.0, 4.0], dtype='float64'),
976
columns=df.columns
977
)
978
)
979
pd.testing.assert_frame_equal(
980
df.vbt.value_counts(sort=True, normalize=True, dropna=True),
981
pd.DataFrame(
982
np.array([
983
[0.08333333333333333, 0.08333333333333333, 0.16666666666666666],
984
[0.08333333333333333, 0.08333333333333333, 0.16666666666666666],
985
[0.08333333333333333, 0.08333333333333333, 0.0],
986
[0.08333333333333333, 0.08333333333333333, 0.0]
987
]),
988
index=pd.Index([1.0, 2.0, 3.0, 4.0], dtype='float64'),
989
columns=df.columns
990
)
991
)
992
993
def test_drawdown(self):
994
pd.testing.assert_series_equal(
995
df['a'].vbt.drawdown(),
996
df['a'] / df['a'].expanding().max() - 1
997
)
998
pd.testing.assert_frame_equal(
999
df.vbt.drawdown(),
1000
df / df.expanding().max() - 1
1001
)
1002
1003
def test_drawdowns(self):
1004
assert type(df['a'].vbt.drawdowns) is vbt.Drawdowns
1005
assert df['a'].vbt.drawdowns.wrapper.freq == df['a'].vbt.wrapper.freq
1006
assert df['a'].vbt.drawdowns.wrapper.ndim == df['a'].ndim
1007
assert df.vbt.drawdowns.wrapper.ndim == df.ndim
1008
1009
def test_to_mapped(self):
1010
np.testing.assert_array_equal(
1011
df.vbt.to_mapped().values,
1012
np.array([1., 2., 3., 4., 4., 3., 2., 1., 1., 2., 2., 1.])
1013
)
1014
np.testing.assert_array_equal(
1015
df.vbt.to_mapped().col_arr,
1016
np.array([0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2])
1017
)
1018
np.testing.assert_array_equal(
1019
df.vbt.to_mapped().idx_arr,
1020
np.array([0, 1, 2, 3, 1, 2, 3, 4, 0, 1, 3, 4])
1021
)
1022
np.testing.assert_array_equal(
1023
df.vbt.to_mapped(dropna=False).values,
1024
np.array([1., 2., 3., 4., np.nan, np.nan, 4., 3., 2., 1., 1., 2., np.nan, 2., 1.])
1025
)
1026
np.testing.assert_array_equal(
1027
df.vbt.to_mapped(dropna=False).col_arr,
1028
np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2])
1029
)
1030
np.testing.assert_array_equal(
1031
df.vbt.to_mapped(dropna=False).idx_arr,
1032
np.array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4])
1033
)
1034
1035
def test_zscore(self):
1036
pd.testing.assert_series_equal(
1037
df['a'].vbt.zscore(),
1038
(df['a'] - df['a'].mean()) / df['a'].std(ddof=0)
1039
)
1040
pd.testing.assert_frame_equal(
1041
df.vbt.zscore(),
1042
(df - df.mean()) / df.std(ddof=0)
1043
)
1044
1045
def test_split(self):
1046
splitter = TimeSeriesSplit(n_splits=2)
1047
(train_df, train_indexes), (test_df, test_indexes) = df['a'].vbt.split(splitter)
1048
pd.testing.assert_frame_equal(
1049
train_df,
1050
pd.DataFrame(
1051
np.array([
1052
[1.0, 1.0],
1053
[2.0, 2.0],
1054
[3.0, 3.0],
1055
[np.nan, 4.0]
1056
]),
1057
index=pd.RangeIndex(start=0, stop=4, step=1),
1058
columns=pd.Index([0, 1], dtype='int64', name='split_idx')
1059
)
1060
)
1061
target = [
1062
pd.DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'],
1063
dtype='datetime64[ns]', name='split_0', freq=None),
1064
pd.DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04'],
1065
dtype='datetime64[ns]', name='split_1', freq=None)
1066
]
1067
for i in range(len(target)):
1068
pd.testing.assert_index_equal(
1069
train_indexes[i],
1070
target[i]
1071
)
1072
pd.testing.assert_frame_equal(
1073
test_df,
1074
pd.DataFrame(
1075
np.array([
1076
[4.0, np.nan]
1077
]),
1078
index=pd.RangeIndex(start=0, stop=1, step=1),
1079
columns=pd.Index([0, 1], dtype='int64', name='split_idx')
1080
)
1081
)
1082
target = [
1083
pd.DatetimeIndex(['2018-01-04'], dtype='datetime64[ns]', name='split_0', freq=None),
1084
pd.DatetimeIndex(['2018-01-05'], dtype='datetime64[ns]', name='split_1', freq=None)
1085
]
1086
for i in range(len(target)):
1087
pd.testing.assert_index_equal(
1088
test_indexes[i],
1089
target[i]
1090
)
1091
(train_df, train_indexes), (test_df, test_indexes) = df.vbt.split(splitter)
1092
pd.testing.assert_frame_equal(
1093
train_df,
1094
pd.DataFrame(
1095
np.array([
1096
[1.0, np.nan, 1.0, 1.0, np.nan, 1.0],
1097
[2.0, 4.0, 2.0, 2.0, 4.0, 2.0],
1098
[3.0, 3.0, np.nan, 3.0, 3.0, np.nan],
1099
[np.nan, np.nan, np.nan, 4.0, 2.0, 2.0]
1100
]),
1101
index=pd.RangeIndex(start=0, stop=4, step=1),
1102
columns=pd.MultiIndex.from_tuples([
1103
(0, 'a'),
1104
(0, 'b'),
1105
(0, 'c'),
1106
(1, 'a'),
1107
(1, 'b'),
1108
(1, 'c')
1109
], names=['split_idx', None])
1110
)
1111
)
1112
target = [
1113
pd.DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'],
1114
dtype='datetime64[ns]', name='split_0', freq=None),
1115
pd.DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04'],
1116
dtype='datetime64[ns]', name='split_1', freq=None)
1117
]
1118
for i in range(len(target)):
1119
pd.testing.assert_index_equal(
1120
train_indexes[i],
1121
target[i]
1122
)
1123
pd.testing.assert_frame_equal(
1124
test_df,
1125
pd.DataFrame(
1126
np.array([
1127
[4.0, 2.0, 2.0, np.nan, 1.0, 1.0]
1128
]),
1129
index=pd.RangeIndex(start=0, stop=1, step=1),
1130
columns=pd.MultiIndex.from_tuples([
1131
(0, 'a'),
1132
(0, 'b'),
1133
(0, 'c'),
1134
(1, 'a'),
1135
(1, 'b'),
1136
(1, 'c')
1137
], names=['split_idx', None])
1138
)
1139
)
1140
target = [
1141
pd.DatetimeIndex(['2018-01-04'], dtype='datetime64[ns]', name='split_0', freq=None),
1142
pd.DatetimeIndex(['2018-01-05'], dtype='datetime64[ns]', name='split_1', freq=None)
1143
]
1144
for i in range(len(target)):
1145
pd.testing.assert_index_equal(
1146
test_indexes[i],
1147
target[i]
1148
)
1149
1150
def test_range_split(self):
1151
pd.testing.assert_frame_equal(
1152
df['a'].vbt.range_split(n=2)[0],
1153
pd.DataFrame(
1154
np.array([
1155
[1., 4.],
1156
[2., np.nan]
1157
]),
1158
index=pd.RangeIndex(start=0, stop=2, step=1),
1159
columns=pd.Index([0, 1], dtype='int64', name='split_idx')
1160
)
1161
)
1162
target = [
1163
pd.DatetimeIndex(['2018-01-01', '2018-01-02'], dtype='datetime64[ns]', name='split_0', freq=None),
1164
pd.DatetimeIndex(['2018-01-04', '2018-01-05'], dtype='datetime64[ns]', name='split_1', freq=None)
1165
]
1166
for i in range(len(target)):
1167
pd.testing.assert_index_equal(
1168
df['a'].vbt.range_split(n=2)[1][i],
1169
target[i]
1170
)
1171
pd.testing.assert_frame_equal(
1172
df['a'].vbt.range_split(range_len=2)[0],
1173
pd.DataFrame(
1174
np.array([
1175
[1., 2., 3., 4.],
1176
[2., 3., 4., np.nan]
1177
]),
1178
index=pd.RangeIndex(start=0, stop=2, step=1),
1179
columns=pd.Index([0, 1, 2, 3], dtype='int64', name='split_idx')
1180
)
1181
)
1182
target = [
1183
pd.DatetimeIndex(['2018-01-01', '2018-01-02'], dtype='datetime64[ns]', name='split_0', freq=None),
1184
pd.DatetimeIndex(['2018-01-02', '2018-01-03'], dtype='datetime64[ns]', name='split_1', freq=None),
1185
pd.DatetimeIndex(['2018-01-03', '2018-01-04'], dtype='datetime64[ns]', name='split_2', freq=None),
1186
pd.DatetimeIndex(['2018-01-04', '2018-01-05'], dtype='datetime64[ns]', name='split_3', freq=None)
1187
]
1188
for i in range(len(target)):
1189
pd.testing.assert_index_equal(
1190
df['a'].vbt.range_split(range_len=2)[1][i],
1191
target[i]
1192
)
1193
pd.testing.assert_frame_equal(
1194
df['a'].vbt.range_split(range_len=2, n=3)[0],
1195
pd.DataFrame(
1196
np.array([
1197
[1., 3., 4.],
1198
[2., 4., np.nan]
1199
]),
1200
index=pd.RangeIndex(start=0, stop=2, step=1),
1201
columns=pd.Index([0, 1, 2], dtype='int64', name='split_idx')
1202
)
1203
)
1204
target = [
1205
pd.DatetimeIndex(['2018-01-01', '2018-01-02'], dtype='datetime64[ns]', name='split_0', freq=None),
1206
pd.DatetimeIndex(['2018-01-03', '2018-01-04'], dtype='datetime64[ns]', name='split_1', freq=None),
1207
pd.DatetimeIndex(['2018-01-04', '2018-01-05'], dtype='datetime64[ns]', name='split_2', freq=None)
1208
]
1209
for i in range(len(target)):
1210
pd.testing.assert_index_equal(
1211
df['a'].vbt.range_split(range_len=2, n=3)[1][i],
1212
target[i]
1213
)
1214
pd.testing.assert_frame_equal(
1215
df['a'].vbt.range_split(range_len=3, n=2)[0],
1216
pd.DataFrame(
1217
np.array([
1218
[1., 3.],
1219
[2., 4.],
1220
[3., np.nan]
1221
]),
1222
index=pd.RangeIndex(start=0, stop=3, step=1),
1223
columns=pd.Index([0, 1], dtype='int64', name='split_idx')
1224
)
1225
)
1226
target = [
1227
pd.DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'],
1228
dtype='datetime64[ns]', name='split_0', freq=None),
1229
pd.DatetimeIndex(['2018-01-03', '2018-01-04', '2018-01-05'],
1230
dtype='datetime64[ns]', name='split_1', freq=None)
1231
]
1232
for i in range(len(target)):
1233
pd.testing.assert_index_equal(
1234
df['a'].vbt.range_split(range_len=3, n=2)[1][i],
1235
target[i]
1236
)
1237
pd.testing.assert_frame_equal(
1238
df.vbt.range_split(n=2)[0],
1239
pd.DataFrame(
1240
np.array([
1241
[1.0, np.nan, 1.0, 4.0, 2.0, 2.0],
1242
[2.0, 4.0, 2.0, np.nan, 1.0, 1.0]
1243
]),
1244
index=pd.RangeIndex(start=0, stop=2, step=1),
1245
columns=pd.MultiIndex.from_arrays([
1246
pd.Index([0, 0, 0, 1, 1, 1], dtype='int64', name='split_idx'),
1247
pd.Index(['a', 'b', 'c', 'a', 'b', 'c'], dtype='object')
1248
])
1249
)
1250
)
1251
target = [
1252
pd.DatetimeIndex(['2018-01-01', '2018-01-02'], dtype='datetime64[ns]', name='split_0', freq=None),
1253
pd.DatetimeIndex(['2018-01-04', '2018-01-05'], dtype='datetime64[ns]', name='split_1', freq=None)
1254
]
1255
for i in range(len(target)):
1256
pd.testing.assert_index_equal(
1257
df.vbt.range_split(n=2)[1][i],
1258
target[i]
1259
)
1260
pd.testing.assert_frame_equal(
1261
df.vbt.range_split(start_idxs=[0, 1], end_idxs=[2, 3])[0],
1262
pd.DataFrame(
1263
np.array([
1264
[1.0, np.nan, 1.0, 2.0, 4.0, 2.0],
1265
[2.0, 4.0, 2.0, 3.0, 3.0, np.nan],
1266
[3.0, 3.0, np.nan, 4.0, 2.0, 2.0]
1267
]),
1268
index=pd.RangeIndex(start=0, stop=3, step=1),
1269
columns=pd.MultiIndex.from_arrays([
1270
pd.Index([0, 0, 0, 1, 1, 1], dtype='int64', name='split_idx'),
1271
pd.Index(['a', 'b', 'c', 'a', 'b', 'c'], dtype='object')
1272
])
1273
)
1274
)
1275
target = [
1276
pd.DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'],
1277
dtype='datetime64[ns]', name='split_0', freq=None),
1278
pd.DatetimeIndex(['2018-01-02', '2018-01-03', '2018-01-04'],
1279
dtype='datetime64[ns]', name='split_1', freq=None)
1280
]
1281
for i in range(len(target)):
1282
pd.testing.assert_index_equal(
1283
df.vbt.range_split(start_idxs=[0, 1], end_idxs=[2, 3])[1][i],
1284
target[i]
1285
)
1286
pd.testing.assert_frame_equal(
1287
df.vbt.range_split(start_idxs=df.index[[0, 1]], end_idxs=df.index[[2, 3]])[0],
1288
pd.DataFrame(
1289
np.array([
1290
[1.0, np.nan, 1.0, 2.0, 4.0, 2.0],
1291
[2.0, 4.0, 2.0, 3.0, 3.0, np.nan],
1292
[3.0, 3.0, np.nan, 4.0, 2.0, 2.0]
1293
]),
1294
index=pd.RangeIndex(start=0, stop=3, step=1),
1295
columns=pd.MultiIndex.from_arrays([
1296
pd.Index([0, 0, 0, 1, 1, 1], dtype='int64', name='split_idx'),
1297
pd.Index(['a', 'b', 'c', 'a', 'b', 'c'], dtype='object')
1298
])
1299
)
1300
)
1301
target = [
1302
pd.DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'],
1303
dtype='datetime64[ns]', name='split_0', freq=None),
1304
pd.DatetimeIndex(['2018-01-02', '2018-01-03', '2018-01-04'],
1305
dtype='datetime64[ns]', name='split_1', freq=None)
1306
]
1307
for i in range(len(target)):
1308
pd.testing.assert_index_equal(
1309
df.vbt.range_split(start_idxs=df.index[[0, 1]], end_idxs=df.index[[2, 3]])[1][i],
1310
target[i]
1311
)
1312
pd.testing.assert_frame_equal(
1313
df.vbt.range_split(start_idxs=df.index[[0]], end_idxs=df.index[[2, 3]])[0],
1314
pd.DataFrame(
1315
np.array([
1316
[1.0, np.nan, 1.0, 1.0, np.nan, 1.0],
1317
[2.0, 4.0, 2.0, 2.0, 4.0, 2.0],
1318
[3.0, 3.0, np.nan, 3.0, 3.0, np.nan],
1319
[np.nan, np.nan, np.nan, 4.0, 2.0, 2.0]
1320
]),
1321
index=pd.RangeIndex(start=0, stop=4, step=1),
1322
columns=pd.MultiIndex.from_arrays([
1323
pd.Index([0, 0, 0, 1, 1, 1], dtype='int64', name='split_idx'),
1324
pd.Index(['a', 'b', 'c', 'a', 'b', 'c'], dtype='object')
1325
])
1326
)
1327
)
1328
target = [
1329
pd.DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'],
1330
dtype='datetime64[ns]', name='split_0', freq=None),
1331
pd.DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04'],
1332
dtype='datetime64[ns]', name='split_1', freq=None)
1333
]
1334
for i in range(len(target)):
1335
pd.testing.assert_index_equal(
1336
df.vbt.range_split(start_idxs=df.index[[0]], end_idxs=df.index[[2, 3]])[1][i],
1337
target[i]
1338
)
1339
with pytest.raises(Exception):
1340
df.vbt.range_split()
1341
with pytest.raises(Exception):
1342
df.vbt.range_split(start_idxs=[0, 1])
1343
with pytest.raises(Exception):
1344
df.vbt.range_split(end_idxs=[2, 4])
1345
with pytest.raises(Exception):
1346
df.vbt.range_split(min_len=10)
1347
with pytest.raises(Exception):
1348
df.vbt.range_split(n=10)
1349
1350
def test_rolling_split(self):
1351
(df1, indexes1), (df2, indexes2), (df3, indexes3) = df['a'].vbt.rolling_split(
1352
window_len=4, set_lens=(1, 1), left_to_right=False)
1353
pd.testing.assert_frame_equal(
1354
df1,
1355
pd.DataFrame(
1356
np.array([
1357
[1.0, 2.0],
1358
[2.0, 3.0]
1359
]),
1360
index=pd.RangeIndex(start=0, stop=2, step=1),
1361
columns=pd.Index([0, 1], dtype='int64', name='split_idx')
1362
)
1363
)
1364
target = [
1365
pd.DatetimeIndex(['2018-01-01', '2018-01-02'], dtype='datetime64[ns]', name='split_0', freq=None),
1366
pd.DatetimeIndex(['2018-01-02', '2018-01-03'], dtype='datetime64[ns]', name='split_1', freq=None)
1367
]
1368
for i in range(len(target)):
1369
pd.testing.assert_index_equal(
1370
indexes1[i],
1371
target[i]
1372
)
1373
pd.testing.assert_frame_equal(
1374
df2,
1375
pd.DataFrame(
1376
np.array([
1377
[3.0, 4.0]
1378
]),
1379
index=pd.RangeIndex(start=0, stop=1, step=1),
1380
columns=pd.Index([0, 1], dtype='int64', name='split_idx')
1381
)
1382
)
1383
target = [
1384
pd.DatetimeIndex(['2018-01-03'], dtype='datetime64[ns]', name='split_0', freq=None),
1385
pd.DatetimeIndex(['2018-01-04'], dtype='datetime64[ns]', name='split_1', freq=None)
1386
]
1387
for i in range(len(target)):
1388
pd.testing.assert_index_equal(
1389
indexes2[i],
1390
target[i]
1391
)
1392
pd.testing.assert_frame_equal(
1393
df3,
1394
pd.DataFrame(
1395
np.array([
1396
[4.0, np.nan]
1397
]),
1398
index=pd.RangeIndex(start=0, stop=1, step=1),
1399
columns=pd.Index([0, 1], dtype='int64', name='split_idx')
1400
)
1401
)
1402
target = [
1403
pd.DatetimeIndex(['2018-01-04'], dtype='datetime64[ns]', name='split_0', freq=None),
1404
pd.DatetimeIndex(['2018-01-05'], dtype='datetime64[ns]', name='split_1', freq=None)
1405
]
1406
for i in range(len(target)):
1407
pd.testing.assert_index_equal(
1408
indexes3[i],
1409
target[i]
1410
)
1411
(df1, indexes1), (df2, indexes2), (df3, indexes3) = df['a'].vbt.rolling_split(
1412
window_len=4, set_lens=(1, 1), left_to_right=True)
1413
pd.testing.assert_frame_equal(
1414
df1,
1415
pd.DataFrame(
1416
np.array([
1417
[1.0, 2.0]
1418
]),
1419
index=pd.RangeIndex(start=0, stop=1, step=1),
1420
columns=pd.Index([0, 1], dtype='int64', name='split_idx')
1421
)
1422
)
1423
target = [
1424
pd.DatetimeIndex(['2018-01-01'], dtype='datetime64[ns]', name='split_0', freq=None),
1425
pd.DatetimeIndex(['2018-01-02'], dtype='datetime64[ns]', name='split_1', freq=None)
1426
]
1427
for i in range(len(target)):
1428
pd.testing.assert_index_equal(
1429
indexes1[i],
1430
target[i]
1431
)
1432
pd.testing.assert_frame_equal(
1433
df2,
1434
pd.DataFrame(
1435
np.array([
1436
[2.0, 3.0]
1437
]),
1438
index=pd.RangeIndex(start=0, stop=1, step=1),
1439
columns=pd.Index([0, 1], dtype='int64', name='split_idx')
1440
)
1441
)
1442
target = [
1443
pd.DatetimeIndex(['2018-01-02'], dtype='datetime64[ns]', name='split_0', freq=None),
1444
pd.DatetimeIndex(['2018-01-03'], dtype='datetime64[ns]', name='split_1', freq=None)
1445
]
1446
for i in range(len(target)):
1447
pd.testing.assert_index_equal(
1448
indexes2[i],
1449
target[i]
1450
)
1451
pd.testing.assert_frame_equal(
1452
df3,
1453
pd.DataFrame(
1454
np.array([
1455
[3.0, 4.0],
1456
[4.0, np.nan]
1457
]),
1458
index=pd.RangeIndex(start=0, stop=2, step=1),
1459
columns=pd.Index([0, 1], dtype='int64', name='split_idx')
1460
)
1461
)
1462
target = [
1463
pd.DatetimeIndex(['2018-01-03', '2018-01-04'], dtype='datetime64[ns]', name='split_0', freq=None),
1464
pd.DatetimeIndex(['2018-01-04', '2018-01-05'], dtype='datetime64[ns]', name='split_1', freq=None)
1465
]
1466
for i in range(len(target)):
1467
pd.testing.assert_index_equal(
1468
indexes3[i],
1469
target[i]
1470
)
1471
(df1, indexes1), (df2, indexes2), (df3, indexes3) = df['a'].vbt.rolling_split(
1472
window_len=4, set_lens=(0.25, 0.25), left_to_right=[False, True])
1473
pd.testing.assert_frame_equal(
1474
df1,
1475
pd.DataFrame(
1476
np.array([
1477
[1.0, 2.0],
1478
[2.0, np.nan]
1479
]),
1480
index=pd.RangeIndex(start=0, stop=2, step=1),
1481
columns=pd.Index([0, 1], dtype='int64', name='split_idx')
1482
)
1483
)
1484
target = [
1485
pd.DatetimeIndex(['2018-01-01', '2018-01-02'], dtype='datetime64[ns]', name='split_0', freq=None),
1486
pd.DatetimeIndex(['2018-01-02'], dtype='datetime64[ns]', name='split_1', freq=None)
1487
]
1488
for i in range(len(target)):
1489
pd.testing.assert_index_equal(
1490
indexes1[i],
1491
target[i]
1492
)
1493
pd.testing.assert_frame_equal(
1494
df2,
1495
pd.DataFrame(
1496
np.array([
1497
[3.0, 3.0]
1498
]),
1499
index=pd.RangeIndex(start=0, stop=1, step=1),
1500
columns=pd.Index([0, 1], dtype='int64', name='split_idx')
1501
)
1502
)
1503
target = [
1504
pd.DatetimeIndex(['2018-01-03'], dtype='datetime64[ns]', name='split_0', freq=None),
1505
pd.DatetimeIndex(['2018-01-03'], dtype='datetime64[ns]', name='split_1', freq=None)
1506
]
1507
for i in range(len(target)):
1508
pd.testing.assert_index_equal(
1509
indexes2[i],
1510
target[i]
1511
)
1512
pd.testing.assert_frame_equal(
1513
df3,
1514
pd.DataFrame(
1515
np.array([
1516
[4.0, 4.0],
1517
[np.nan, np.nan]
1518
]),
1519
index=pd.RangeIndex(start=0, stop=2, step=1),
1520
columns=pd.Index([0, 1], dtype='int64', name='split_idx')
1521
)
1522
)
1523
target = [
1524
pd.DatetimeIndex(['2018-01-04'], dtype='datetime64[ns]', name='split_0', freq=None),
1525
pd.DatetimeIndex(['2018-01-04', '2018-01-05'], dtype='datetime64[ns]', name='split_1', freq=None)
1526
]
1527
for i in range(len(target)):
1528
pd.testing.assert_index_equal(
1529
indexes3[i],
1530
target[i]
1531
)
1532
df1, indexes1 = df['a'].vbt.rolling_split(window_len=2, n=2)
1533
pd.testing.assert_frame_equal(
1534
df1,
1535
pd.DataFrame(
1536
np.array([
1537
[1.0, 4.0],
1538
[2.0, np.nan]
1539
]),
1540
index=pd.RangeIndex(start=0, stop=2, step=1),
1541
columns=pd.Index([0, 1], dtype='int64', name='split_idx')
1542
)
1543
)
1544
target = [
1545
pd.DatetimeIndex(['2018-01-01', '2018-01-02'], dtype='datetime64[ns]', name='split_0', freq=None),
1546
pd.DatetimeIndex(['2018-01-04', '2018-01-05'], dtype='datetime64[ns]', name='split_1', freq=None)
1547
]
1548
df1, indexes1 = df['a'].vbt.rolling_split(window_len=0.4, n=2)
1549
pd.testing.assert_frame_equal(
1550
df1,
1551
pd.DataFrame(
1552
np.array([
1553
[1.0, 4.0],
1554
[2.0, np.nan]
1555
]),
1556
index=pd.RangeIndex(start=0, stop=2, step=1),
1557
columns=pd.Index([0, 1], dtype='int64', name='split_idx')
1558
)
1559
)
1560
target = [
1561
pd.DatetimeIndex(['2018-01-01', '2018-01-02'], dtype='datetime64[ns]', name='split_0', freq=None),
1562
pd.DatetimeIndex(['2018-01-04', '2018-01-05'], dtype='datetime64[ns]', name='split_1', freq=None)
1563
]
1564
for i in range(len(target)):
1565
pd.testing.assert_index_equal(
1566
indexes1[i],
1567
target[i]
1568
)
1569
with pytest.raises(Exception):
1570
df.vbt.rolling_split()
1571
with pytest.raises(Exception):
1572
df.vbt.rolling_split(window_len=3, set_lens=(3, 1))
1573
with pytest.raises(Exception):
1574
df.vbt.rolling_split(window_len=1, set_lens=(1, 1))
1575
with pytest.raises(Exception):
1576
df.vbt.rolling_split(n=2, min_len=10)
1577
with pytest.raises(Exception):
1578
df.vbt.rolling_split(n=10)
1579
1580
def test_expanding_split(self):
1581
(df1, indexes1), (df2, indexes2), (df3, indexes3) = df['a'].vbt.expanding_split(
1582
min_len=4, set_lens=(1, 1), left_to_right=False)
1583
pd.testing.assert_frame_equal(
1584
df1,
1585
pd.DataFrame(
1586
np.array([
1587
[1.0, 1.0],
1588
[2.0, 2.0],
1589
[np.nan, 3.0]
1590
]),
1591
index=pd.RangeIndex(start=0, stop=3, step=1),
1592
columns=pd.Index([0, 1], dtype='int64', name='split_idx')
1593
)
1594
)
1595
target = [
1596
pd.DatetimeIndex(['2018-01-01', '2018-01-02'],
1597
dtype='datetime64[ns]', name='split_0', freq=None),
1598
pd.DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'],
1599
dtype='datetime64[ns]', name='split_1', freq=None)
1600
]
1601
for i in range(len(target)):
1602
pd.testing.assert_index_equal(
1603
indexes1[i],
1604
target[i]
1605
)
1606
pd.testing.assert_frame_equal(
1607
df2,
1608
pd.DataFrame(
1609
np.array([
1610
[3.0, 4.0]
1611
]),
1612
index=pd.RangeIndex(start=0, stop=1, step=1),
1613
columns=pd.Index([0, 1], dtype='int64', name='split_idx')
1614
)
1615
)
1616
target = [
1617
pd.DatetimeIndex(['2018-01-03'], dtype='datetime64[ns]', name='split_0', freq=None),
1618
pd.DatetimeIndex(['2018-01-04'], dtype='datetime64[ns]', name='split_1', freq=None)
1619
]
1620
for i in range(len(target)):
1621
pd.testing.assert_index_equal(
1622
indexes2[i],
1623
target[i]
1624
)
1625
pd.testing.assert_frame_equal(
1626
df3,
1627
pd.DataFrame(
1628
np.array([
1629
[4.0, np.nan]
1630
]),
1631
index=pd.RangeIndex(start=0, stop=1, step=1),
1632
columns=pd.Index([0, 1], dtype='int64', name='split_idx')
1633
)
1634
)
1635
target = [
1636
pd.DatetimeIndex(['2018-01-04'], dtype='datetime64[ns]', name='split_0', freq=None),
1637
pd.DatetimeIndex(['2018-01-05'], dtype='datetime64[ns]', name='split_1', freq=None)
1638
]
1639
for i in range(len(target)):
1640
pd.testing.assert_index_equal(
1641
indexes3[i],
1642
target[i]
1643
)
1644
df1, indexes1 = df['a'].vbt.expanding_split(n=2, min_len=2)
1645
pd.testing.assert_frame_equal(
1646
df1,
1647
pd.DataFrame(
1648
np.array([
1649
[1.0, 1.0],
1650
[2.0, 2.0],
1651
[np.nan, 3.0],
1652
[np.nan, 4.0],
1653
[np.nan, np.nan]
1654
]),
1655
index=pd.RangeIndex(start=0, stop=5, step=1),
1656
columns=pd.Index([0, 1], dtype='int64', name='split_idx')
1657
)
1658
)
1659
target = [
1660
pd.DatetimeIndex(['2018-01-01', '2018-01-02'],
1661
dtype='datetime64[ns]', name='split_0', freq=None),
1662
pd.DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04', '2018-01-05'],
1663
dtype='datetime64[ns]', name='split_1', freq=None)
1664
]
1665
for i in range(len(target)):
1666
pd.testing.assert_index_equal(
1667
indexes1[i],
1668
target[i]
1669
)
1670
with pytest.raises(Exception):
1671
df.vbt.expanding_split(n=2, min_len=10)
1672
with pytest.raises(Exception):
1673
df.vbt.expanding_split(n=10)
1674
1675
def test_crossed_above(self):
1676
sr1 = pd.Series([np.nan, 3, 2, 1, 2, 3, 4])
1677
sr2 = pd.Series([1, 2, 3, 4, 3, 2, 1])
1678
pd.testing.assert_series_equal(
1679
sr1.vbt.crossed_above(sr2),
1680
pd.Series([False, False, False, False, False, True, False])
1681
)
1682
pd.testing.assert_series_equal(
1683
sr1.vbt.crossed_above(sr2, wait=1),
1684
pd.Series([False, False, False, False, False, False, True])
1685
)
1686
sr3 = pd.Series([1, 2, 3, np.nan, 5, 1, 5])
1687
sr4 = pd.Series([3, 2, 1, 1, 1, 5, 1])
1688
pd.testing.assert_series_equal(
1689
sr3.vbt.crossed_above(sr4),
1690
pd.Series([False, False, True, False, False, False, True])
1691
)
1692
pd.testing.assert_series_equal(
1693
sr3.vbt.crossed_above(sr4, wait=1),
1694
pd.Series([False, False, False, False, False, False, False])
1695
)
1696
1697
def test_crossed_below(self):
1698
sr1 = pd.Series([np.nan, 3, 2, 1, 2, 3, 4])
1699
sr2 = pd.Series([1, 2, 3, 4, 3, 2, 1])
1700
pd.testing.assert_series_equal(
1701
sr1.vbt.crossed_below(sr2),
1702
pd.Series([False, False, True, False, False, False, False])
1703
)
1704
pd.testing.assert_series_equal(
1705
sr1.vbt.crossed_below(sr2, wait=1),
1706
pd.Series([False, False, False, True, False, False, False])
1707
)
1708
sr3 = pd.Series([1, 2, 3, np.nan, 5, 1, 5])
1709
sr4 = pd.Series([3, 2, 1, 1, 1, 5, 1])
1710
pd.testing.assert_series_equal(
1711
sr3.vbt.crossed_above(sr4),
1712
pd.Series([False, False, True, False, False, False, True])
1713
)
1714
pd.testing.assert_series_equal(
1715
sr3.vbt.crossed_above(sr4, wait=1),
1716
pd.Series([False, False, False, False, False, False, False])
1717
)
1718
1719
def test_stats(self):
1720
stats_index = pd.Index([
1721
'Start', 'End', 'Period', 'Count', 'Mean', 'Std', 'Min', 'Median', 'Max', 'Min Index', 'Max Index'
1722
], dtype='object')
1723
pd.testing.assert_series_equal(
1724
df.vbt.stats(),
1725
pd.Series([
1726
pd.Timestamp('2018-01-01 00:00:00'),
1727
pd.Timestamp('2018-01-05 00:00:00'),
1728
pd.Timedelta('5 days 00:00:00'),
1729
4.0, 2.1666666666666665, 1.0531130555537456, 1.0, 2.1666666666666665, 3.3333333333333335
1730
],
1731
index=stats_index[:-2],
1732
name='agg_func_mean'
1733
)
1734
)
1735
pd.testing.assert_series_equal(
1736
df.vbt.stats(column='a'),
1737
pd.Series([
1738
pd.Timestamp('2018-01-01 00:00:00'),
1739
pd.Timestamp('2018-01-05 00:00:00'),
1740
pd.Timedelta('5 days 00:00:00'),
1741
4, 2.5, 1.2909944487358056, 1.0, 2.5, 4.0,
1742
pd.Timestamp('2018-01-01 00:00:00'),
1743
pd.Timestamp('2018-01-04 00:00:00')
1744
],
1745
index=stats_index,
1746
name='a'
1747
)
1748
)
1749
pd.testing.assert_series_equal(
1750
df.vbt.stats(column='g1', group_by=group_by),
1751
pd.Series([
1752
pd.Timestamp('2018-01-01 00:00:00'),
1753
pd.Timestamp('2018-01-05 00:00:00'),
1754
pd.Timedelta('5 days 00:00:00'),
1755
8, 2.5, 1.1952286093343936, 1.0, 2.5, 4.0,
1756
pd.Timestamp('2018-01-01 00:00:00'),
1757
pd.Timestamp('2018-01-02 00:00:00')
1758
],
1759
index=stats_index,
1760
name='g1'
1761
)
1762
)
1763
pd.testing.assert_series_equal(
1764
df['c'].vbt.stats(),
1765
df.vbt.stats(column='c')
1766
)
1767
pd.testing.assert_series_equal(
1768
df['c'].vbt.stats(),
1769
df.vbt.stats(column='c', group_by=False)
1770
)
1771
pd.testing.assert_series_equal(
1772
df.vbt(group_by=group_by)['g2'].stats(),
1773
df.vbt(group_by=group_by).stats(column='g2')
1774
)
1775
pd.testing.assert_series_equal(
1776
df.vbt(group_by=group_by)['g2'].stats(),
1777
df.vbt.stats(column='g2', group_by=group_by)
1778
)
1779
stats_df = df.vbt.stats(agg_func=None)
1780
assert stats_df.shape == (3, 11)
1781
pd.testing.assert_index_equal(stats_df.index, df.vbt.wrapper.columns)
1782
pd.testing.assert_index_equal(stats_df.columns, stats_index)
1783
1784
def test_stats_mapping(self):
1785
mapping = {x: 'test_' + str(x) for x in pd.unique(df.values.flatten())}
1786
stats_index = pd.Index([
1787
'Start', 'End', 'Period', 'Value Counts: test_1.0',
1788
'Value Counts: test_2.0', 'Value Counts: test_3.0',
1789
'Value Counts: test_4.0', 'Value Counts: test_nan'
1790
], dtype='object')
1791
pd.testing.assert_series_equal(
1792
df.vbt(mapping=mapping).stats(),
1793
pd.Series([
1794
pd.Timestamp('2018-01-01 00:00:00'),
1795
pd.Timestamp('2018-01-05 00:00:00'),
1796
pd.Timedelta('5 days 00:00:00'),
1797
1.3333333333333333, 1.3333333333333333, 0.6666666666666666, 0.6666666666666666, 1.0
1798
],
1799
index=stats_index,
1800
name='agg_func_mean'
1801
)
1802
)
1803
pd.testing.assert_series_equal(
1804
df.vbt(mapping=mapping).stats(column='a'),
1805
pd.Series([
1806
pd.Timestamp('2018-01-01 00:00:00'),
1807
pd.Timestamp('2018-01-05 00:00:00'),
1808
pd.Timedelta('5 days 00:00:00'),
1809
1, 1, 1, 1, 1
1810
],
1811
index=stats_index,
1812
name='a'
1813
)
1814
)
1815
pd.testing.assert_series_equal(
1816
df.vbt(mapping=mapping).stats(column='g1', group_by=group_by),
1817
pd.Series([
1818
pd.Timestamp('2018-01-01 00:00:00'),
1819
pd.Timestamp('2018-01-05 00:00:00'),
1820
pd.Timedelta('5 days 00:00:00'),
1821
2, 2, 2, 2, 2
1822
],
1823
index=stats_index,
1824
name='g1'
1825
)
1826
)
1827
pd.testing.assert_series_equal(
1828
df.vbt(mapping=mapping).stats(),
1829
df.vbt.stats(settings=dict(mapping=mapping))
1830
)
1831
pd.testing.assert_series_equal(
1832
df['c'].vbt(mapping=mapping).stats(settings=dict(incl_all_keys=True)),
1833
df.vbt(mapping=mapping).stats(column='c')
1834
)
1835
pd.testing.assert_series_equal(
1836
df['c'].vbt(mapping=mapping).stats(settings=dict(incl_all_keys=True)),
1837
df.vbt(mapping=mapping).stats(column='c', group_by=False)
1838
)
1839
pd.testing.assert_series_equal(
1840
df.vbt(mapping=mapping, group_by=group_by)['g2'].stats(settings=dict(incl_all_keys=True)),
1841
df.vbt(mapping=mapping, group_by=group_by).stats(column='g2')
1842
)
1843
pd.testing.assert_series_equal(
1844
df.vbt(mapping=mapping, group_by=group_by)['g2'].stats(settings=dict(incl_all_keys=True)),
1845
df.vbt(mapping=mapping).stats(column='g2', group_by=group_by)
1846
)
1847
stats_df = df.vbt(mapping=mapping).stats(agg_func=None)
1848
assert stats_df.shape == (3, 8)
1849
pd.testing.assert_index_equal(stats_df.index, df.vbt.wrapper.columns)
1850
pd.testing.assert_index_equal(stats_df.columns, stats_index)
1851
1852