Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
polakowo
GitHub Repository: polakowo/vectorbt
Path: blob/master/tests/test_base.py
1071 views
1
from datetime import datetime
2
3
import numpy as np
4
import pandas as pd
5
import pytest
6
from numba import njit
7
8
import vectorbt as vbt
9
from vectorbt.base import (
10
array_wrapper,
11
column_grouper,
12
combine_fns,
13
index_fns,
14
indexing,
15
reshape_fns
16
)
17
18
ray_available = True
19
try:
20
import ray
21
except:
22
ray_available = False
23
24
day_dt = np.timedelta64(86400000000000)
25
26
# Initialize global variables
27
a1 = np.array([1])
28
a2 = np.array([1, 2, 3])
29
a3 = np.array([[1, 2, 3]])
30
a4 = np.array([[1], [2], [3]])
31
a5 = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
32
sr_none = pd.Series([1])
33
sr1 = pd.Series([1], index=pd.Index(['x1'], name='i1'), name='a1')
34
sr2 = pd.Series([1, 2, 3], index=pd.Index(['x2', 'y2', 'z2'], name='i2'), name='a2')
35
df_none = pd.DataFrame([[1]])
36
df1 = pd.DataFrame(
37
[[1]],
38
index=pd.Index(['x3'], name='i3'),
39
columns=pd.Index(['a3'], name='c3'))
40
df2 = pd.DataFrame(
41
[[1], [2], [3]],
42
index=pd.Index(['x4', 'y4', 'z4'], name='i4'),
43
columns=pd.Index(['a4'], name='c4'))
44
df3 = pd.DataFrame(
45
[[1, 2, 3]],
46
index=pd.Index(['x5'], name='i5'),
47
columns=pd.Index(['a5', 'b5', 'c5'], name='c5'))
48
df4 = pd.DataFrame(
49
[[1, 2, 3], [4, 5, 6], [7, 8, 9]],
50
index=pd.Index(['x6', 'y6', 'z6'], name='i6'),
51
columns=pd.Index(['a6', 'b6', 'c6'], name='c6'))
52
multi_i = pd.MultiIndex.from_arrays([['x7', 'y7', 'z7'], ['x8', 'y8', 'z8']], names=['i7', 'i8'])
53
multi_c = pd.MultiIndex.from_arrays([['a7', 'b7', 'c7'], ['a8', 'b8', 'c8']], names=['c7', 'c8'])
54
df5 = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=multi_i, columns=multi_c)
55
56
57
# ############# Global ############# #
58
59
def setup_module():
60
vbt.settings.numba['check_func_suffix'] = True
61
vbt.settings.broadcasting['index_from'] = 'stack'
62
vbt.settings.broadcasting['columns_from'] = 'stack'
63
vbt.settings.caching.enabled = False
64
vbt.settings.caching.whitelist = []
65
vbt.settings.caching.blacklist = []
66
if ray_available:
67
ray.init(local_mode=True, num_cpus=1)
68
69
70
def teardown_module():
71
if ray_available:
72
ray.shutdown()
73
vbt.settings.reset()
74
75
76
# ############# column_grouper.py ############# #
77
78
79
grouped_columns = pd.MultiIndex.from_arrays([
80
[1, 1, 1, 1, 0, 0, 0, 0],
81
[3, 3, 2, 2, 1, 1, 0, 0],
82
[7, 6, 5, 4, 3, 2, 1, 0]
83
], names=['first', 'second', 'third'])
84
85
86
class TestColumnGrouper:
87
def test_group_by_to_index(self):
88
assert not column_grouper.group_by_to_index(grouped_columns, group_by=False)
89
assert column_grouper.group_by_to_index(grouped_columns, group_by=None) is None
90
pd.testing.assert_index_equal(
91
column_grouper.group_by_to_index(grouped_columns, group_by=True),
92
pd.Index(['group'] * len(grouped_columns))
93
)
94
pd.testing.assert_index_equal(
95
column_grouper.group_by_to_index(grouped_columns, group_by=0),
96
pd.Index([1, 1, 1, 1, 0, 0, 0, 0], dtype='int64', name='first')
97
)
98
pd.testing.assert_index_equal(
99
column_grouper.group_by_to_index(grouped_columns, group_by='first'),
100
pd.Index([1, 1, 1, 1, 0, 0, 0, 0], dtype='int64', name='first')
101
)
102
pd.testing.assert_index_equal(
103
column_grouper.group_by_to_index(grouped_columns, group_by=[0, 1]),
104
pd.MultiIndex.from_tuples([
105
(1, 3),
106
(1, 3),
107
(1, 2),
108
(1, 2),
109
(0, 1),
110
(0, 1),
111
(0, 0),
112
(0, 0)
113
], names=['first', 'second'])
114
)
115
pd.testing.assert_index_equal(
116
column_grouper.group_by_to_index(grouped_columns, group_by=['first', 'second']),
117
pd.MultiIndex.from_tuples([
118
(1, 3),
119
(1, 3),
120
(1, 2),
121
(1, 2),
122
(0, 1),
123
(0, 1),
124
(0, 0),
125
(0, 0)
126
], names=['first', 'second'])
127
)
128
pd.testing.assert_index_equal(
129
column_grouper.group_by_to_index(
130
grouped_columns, group_by=np.array([3, 2, 1, 1, 1, 0, 0, 0])),
131
pd.Index([3, 2, 1, 1, 1, 0, 0, 0], dtype='int64')
132
)
133
pd.testing.assert_index_equal(
134
column_grouper.group_by_to_index(
135
grouped_columns, group_by=pd.Index([3, 2, 1, 1, 1, 0, 0, 0], name='fourth')),
136
pd.Index([3, 2, 1, 1, 1, 0, 0, 0], dtype='int64', name='fourth')
137
)
138
139
def test_get_groups_and_index(self):
140
a, b = column_grouper.get_groups_and_index(grouped_columns, group_by=None)
141
np.testing.assert_array_equal(a, np.array([0, 1, 2, 3, 4, 5, 6, 7]))
142
pd.testing.assert_index_equal(b, grouped_columns)
143
a, b = column_grouper.get_groups_and_index(grouped_columns, group_by=0)
144
np.testing.assert_array_equal(a, np.array([0, 0, 0, 0, 1, 1, 1, 1]))
145
pd.testing.assert_index_equal(b, pd.Index([1, 0], dtype='int64', name='first'))
146
a, b = column_grouper.get_groups_and_index(grouped_columns, group_by=[0, 1])
147
np.testing.assert_array_equal(a, np.array([0, 0, 1, 1, 2, 2, 3, 3]))
148
pd.testing.assert_index_equal(b, pd.MultiIndex.from_tuples([
149
(1, 3),
150
(1, 2),
151
(0, 1),
152
(0, 0)
153
], names=['first', 'second']))
154
155
def test_get_group_lens_nb(self):
156
np.testing.assert_array_equal(
157
column_grouper.get_group_lens_nb(np.array([0, 0, 0, 0, 1, 1, 1, 1])),
158
np.array([4, 4])
159
)
160
np.testing.assert_array_equal(
161
column_grouper.get_group_lens_nb(np.array([0, 1])),
162
np.array([1, 1])
163
)
164
np.testing.assert_array_equal(
165
column_grouper.get_group_lens_nb(np.array([0, 0])),
166
np.array([2])
167
)
168
np.testing.assert_array_equal(
169
column_grouper.get_group_lens_nb(np.array([0])),
170
np.array([1])
171
)
172
np.testing.assert_array_equal(
173
column_grouper.get_group_lens_nb(np.array([])),
174
np.array([])
175
)
176
with pytest.raises(Exception):
177
column_grouper.get_group_lens_nb(np.array([1, 1, 0, 0]))
178
with pytest.raises(Exception):
179
column_grouper.get_group_lens_nb(np.array([0, 1, 0, 1]))
180
181
def test_is_grouped(self):
182
assert column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_grouped()
183
assert column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_grouped(group_by=True)
184
assert column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_grouped(group_by=1)
185
assert not column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_grouped(group_by=False)
186
assert not column_grouper.ColumnGrouper(grouped_columns).is_grouped()
187
assert column_grouper.ColumnGrouper(grouped_columns).is_grouped(group_by=0)
188
assert column_grouper.ColumnGrouper(grouped_columns).is_grouped(group_by=True)
189
assert not column_grouper.ColumnGrouper(grouped_columns).is_grouped(group_by=False)
190
assert column_grouper.ColumnGrouper(grouped_columns, group_by=0) \
191
.is_grouped(group_by=grouped_columns.get_level_values(0) + 1) # only labels
192
193
def test_is_grouping_enabled(self):
194
assert not column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_grouping_enabled()
195
assert not column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_grouping_enabled(group_by=True)
196
assert not column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_grouping_enabled(group_by=1)
197
assert not column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_grouping_enabled(group_by=False)
198
assert not column_grouper.ColumnGrouper(grouped_columns).is_grouping_enabled()
199
assert column_grouper.ColumnGrouper(grouped_columns).is_grouping_enabled(group_by=0)
200
assert column_grouper.ColumnGrouper(grouped_columns).is_grouping_enabled(group_by=True)
201
assert not column_grouper.ColumnGrouper(grouped_columns).is_grouping_enabled(group_by=False)
202
assert not column_grouper.ColumnGrouper(grouped_columns, group_by=0) \
203
.is_grouping_enabled(group_by=grouped_columns.get_level_values(0) + 1) # only labels
204
205
def test_is_grouping_disabled(self):
206
assert not column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_grouping_disabled()
207
assert not column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_grouping_disabled(group_by=True)
208
assert not column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_grouping_disabled(group_by=1)
209
assert column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_grouping_disabled(group_by=False)
210
assert not column_grouper.ColumnGrouper(grouped_columns).is_grouping_disabled()
211
assert not column_grouper.ColumnGrouper(grouped_columns).is_grouping_disabled(group_by=0)
212
assert not column_grouper.ColumnGrouper(grouped_columns).is_grouping_disabled(group_by=True)
213
assert not column_grouper.ColumnGrouper(grouped_columns).is_grouping_disabled(group_by=False)
214
assert not column_grouper.ColumnGrouper(grouped_columns, group_by=0) \
215
.is_grouping_disabled(group_by=grouped_columns.get_level_values(0) + 1) # only labels
216
217
def test_is_grouping_modified(self):
218
assert not column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_grouping_modified()
219
assert column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_grouping_modified(group_by=True)
220
assert column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_grouping_modified(group_by=1)
221
assert column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_grouping_modified(group_by=False)
222
assert not column_grouper.ColumnGrouper(grouped_columns).is_grouping_modified()
223
assert column_grouper.ColumnGrouper(grouped_columns).is_grouping_modified(group_by=0)
224
assert column_grouper.ColumnGrouper(grouped_columns).is_grouping_modified(group_by=True)
225
assert not column_grouper.ColumnGrouper(grouped_columns).is_grouping_modified(group_by=False)
226
assert not column_grouper.ColumnGrouper(grouped_columns, group_by=0) \
227
.is_grouping_modified(group_by=grouped_columns.get_level_values(0) + 1) # only labels
228
229
def test_is_grouping_changed(self):
230
assert not column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_grouping_changed()
231
assert column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_grouping_changed(group_by=True)
232
assert column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_grouping_changed(group_by=1)
233
assert column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_grouping_changed(group_by=False)
234
assert not column_grouper.ColumnGrouper(grouped_columns).is_grouping_changed()
235
assert column_grouper.ColumnGrouper(grouped_columns).is_grouping_changed(group_by=0)
236
assert column_grouper.ColumnGrouper(grouped_columns).is_grouping_changed(group_by=True)
237
assert not column_grouper.ColumnGrouper(grouped_columns).is_grouping_changed(group_by=False)
238
assert column_grouper.ColumnGrouper(grouped_columns, group_by=0) \
239
.is_grouping_changed(group_by=grouped_columns.get_level_values(0) + 1) # only labels
240
241
def test_is_group_count_changed(self):
242
assert not column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_group_count_changed()
243
assert column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_group_count_changed(group_by=True)
244
assert column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_group_count_changed(group_by=1)
245
assert column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_group_count_changed(group_by=False)
246
assert not column_grouper.ColumnGrouper(grouped_columns).is_group_count_changed()
247
assert column_grouper.ColumnGrouper(grouped_columns).is_group_count_changed(group_by=0)
248
assert column_grouper.ColumnGrouper(grouped_columns).is_group_count_changed(group_by=True)
249
assert not column_grouper.ColumnGrouper(grouped_columns).is_group_count_changed(group_by=False)
250
assert not column_grouper.ColumnGrouper(grouped_columns, group_by=0) \
251
.is_group_count_changed(group_by=grouped_columns.get_level_values(0) + 1) # only labels
252
253
def test_check_group_by(self):
254
column_grouper.ColumnGrouper(grouped_columns, group_by=None, allow_enable=True).check_group_by(group_by=0)
255
with pytest.raises(Exception):
256
column_grouper.ColumnGrouper(grouped_columns, group_by=None, allow_enable=False).check_group_by(group_by=0)
257
column_grouper.ColumnGrouper(grouped_columns, group_by=0, allow_disable=True).check_group_by(group_by=False)
258
with pytest.raises(Exception):
259
column_grouper.ColumnGrouper(grouped_columns, group_by=0, allow_disable=False).check_group_by(
260
group_by=False)
261
column_grouper.ColumnGrouper(grouped_columns, group_by=0, allow_modify=True).check_group_by(group_by=1)
262
column_grouper.ColumnGrouper(grouped_columns, group_by=0, allow_modify=False).check_group_by(
263
group_by=np.array([2, 2, 2, 2, 3, 3, 3, 3]))
264
with pytest.raises(Exception):
265
column_grouper.ColumnGrouper(grouped_columns, group_by=0, allow_modify=False).check_group_by(group_by=1)
266
267
def test_resolve_group_by(self):
268
assert column_grouper.ColumnGrouper(grouped_columns, group_by=None).resolve_group_by() is None # default
269
pd.testing.assert_index_equal(
270
column_grouper.ColumnGrouper(grouped_columns, group_by=None).resolve_group_by(group_by=0), # overrides
271
pd.Index([1, 1, 1, 1, 0, 0, 0, 0], dtype='int64', name='first')
272
)
273
pd.testing.assert_index_equal(
274
column_grouper.ColumnGrouper(grouped_columns, group_by=0).resolve_group_by(), # default
275
pd.Index([1, 1, 1, 1, 0, 0, 0, 0], dtype='int64', name='first')
276
)
277
pd.testing.assert_index_equal(
278
column_grouper.ColumnGrouper(grouped_columns, group_by=0).resolve_group_by(group_by=1), # overrides
279
pd.Index([3, 3, 2, 2, 1, 1, 0, 0], dtype='int64', name='second')
280
)
281
282
def test_get_groups(self):
283
np.testing.assert_array_equal(
284
column_grouper.ColumnGrouper(grouped_columns).get_groups(),
285
np.array([0, 1, 2, 3, 4, 5, 6, 7])
286
)
287
np.testing.assert_array_equal(
288
column_grouper.ColumnGrouper(grouped_columns).get_groups(group_by=0),
289
np.array([0, 0, 0, 0, 1, 1, 1, 1])
290
)
291
292
def test_get_columns(self):
293
pd.testing.assert_index_equal(
294
column_grouper.ColumnGrouper(grouped_columns).get_columns(),
295
column_grouper.ColumnGrouper(grouped_columns).columns
296
)
297
pd.testing.assert_index_equal(
298
column_grouper.ColumnGrouper(grouped_columns).get_columns(group_by=0),
299
pd.Index([1, 0], dtype='int64', name='first')
300
)
301
302
def test_get_group_lens(self):
303
np.testing.assert_array_equal(
304
column_grouper.ColumnGrouper(grouped_columns).get_group_lens(),
305
np.array([1, 1, 1, 1, 1, 1, 1, 1])
306
)
307
np.testing.assert_array_equal(
308
column_grouper.ColumnGrouper(grouped_columns).get_group_lens(group_by=0),
309
np.array([4, 4])
310
)
311
312
def test_get_group_start_idxs(self):
313
np.testing.assert_array_equal(
314
column_grouper.ColumnGrouper(grouped_columns).get_group_start_idxs(),
315
np.array([0, 1, 2, 3, 4, 5, 6, 7])
316
)
317
np.testing.assert_array_equal(
318
column_grouper.ColumnGrouper(grouped_columns).get_group_start_idxs(group_by=0),
319
np.array([0, 4])
320
)
321
322
def test_get_group_end_idxs(self):
323
np.testing.assert_array_equal(
324
column_grouper.ColumnGrouper(grouped_columns).get_group_end_idxs(),
325
np.array([1, 2, 3, 4, 5, 6, 7, 8])
326
)
327
np.testing.assert_array_equal(
328
column_grouper.ColumnGrouper(grouped_columns).get_group_end_idxs(group_by=0),
329
np.array([4, 8])
330
)
331
332
def test_eq(self):
333
assert column_grouper.ColumnGrouper(grouped_columns) == column_grouper.ColumnGrouper(grouped_columns)
334
assert column_grouper.ColumnGrouper(grouped_columns, group_by=0) == column_grouper.ColumnGrouper(
335
grouped_columns, group_by=0)
336
assert column_grouper.ColumnGrouper(grouped_columns) != 0
337
assert column_grouper.ColumnGrouper(grouped_columns) != column_grouper.ColumnGrouper(grouped_columns,
338
group_by=0)
339
assert column_grouper.ColumnGrouper(grouped_columns) != column_grouper.ColumnGrouper(pd.Index([0]))
340
assert column_grouper.ColumnGrouper(grouped_columns) != column_grouper.ColumnGrouper(
341
grouped_columns, allow_enable=False)
342
assert column_grouper.ColumnGrouper(grouped_columns) != column_grouper.ColumnGrouper(
343
grouped_columns, allow_disable=False)
344
assert column_grouper.ColumnGrouper(grouped_columns) != column_grouper.ColumnGrouper(
345
grouped_columns, allow_modify=False)
346
347
348
# ############# array_wrapper.py ############# #
349
350
351
sr2_wrapper = array_wrapper.ArrayWrapper.from_obj(sr2)
352
df2_wrapper = array_wrapper.ArrayWrapper.from_obj(df2)
353
df4_wrapper = array_wrapper.ArrayWrapper.from_obj(df4)
354
355
sr2_wrapper_co = sr2_wrapper.replace(column_only_select=True)
356
df4_wrapper_co = df4_wrapper.replace(column_only_select=True)
357
358
sr2_grouped_wrapper = sr2_wrapper.replace(group_by=np.array(['g1']), group_select=True)
359
df4_grouped_wrapper = df4_wrapper.replace(group_by=np.array(['g1', 'g1', 'g2']), group_select=True)
360
361
sr2_grouped_wrapper_co = sr2_grouped_wrapper.replace(column_only_select=True, group_select=True)
362
df4_grouped_wrapper_co = df4_grouped_wrapper.replace(column_only_select=True, group_select=True)
363
364
365
class TestArrayWrapper:
366
def test_config(self, tmp_path):
367
assert array_wrapper.ArrayWrapper.loads(sr2_wrapper.dumps()) == sr2_wrapper
368
assert array_wrapper.ArrayWrapper.loads(sr2_wrapper_co.dumps()) == sr2_wrapper_co
369
assert array_wrapper.ArrayWrapper.loads(sr2_grouped_wrapper.dumps()) == sr2_grouped_wrapper
370
assert array_wrapper.ArrayWrapper.loads(sr2_grouped_wrapper_co.dumps()) == sr2_grouped_wrapper_co
371
sr2_grouped_wrapper_co.save(tmp_path / 'sr2_grouped_wrapper_co')
372
assert array_wrapper.ArrayWrapper.load(tmp_path / 'sr2_grouped_wrapper_co') == sr2_grouped_wrapper_co
373
374
def test_indexing_func_meta(self):
375
# not grouped
376
a, b, c = sr2_wrapper.indexing_func_meta(lambda x: x.iloc[:2])[1:]
377
np.testing.assert_array_equal(a, np.array([0, 1]))
378
assert b == 0
379
assert c == 0
380
a, b, c = df4_wrapper.indexing_func_meta(lambda x: x.iloc[0, :2])[1:]
381
assert a == 0
382
np.testing.assert_array_equal(b, np.array([0, 1]))
383
np.testing.assert_array_equal(c, np.array([0, 1]))
384
a, b, c = df4_wrapper.indexing_func_meta(lambda x: x.iloc[:2, 0])[1:]
385
np.testing.assert_array_equal(a, np.array([0, 1]))
386
assert b == 0
387
assert c == 0
388
a, b, c = df4_wrapper.indexing_func_meta(lambda x: x.iloc[:2, [0]])[1:]
389
np.testing.assert_array_equal(a, np.array([0, 1]))
390
np.testing.assert_array_equal(b, np.array([0]))
391
np.testing.assert_array_equal(c, np.array([0]))
392
a, b, c = df4_wrapper.indexing_func_meta(lambda x: x.iloc[:2, :2])[1:]
393
np.testing.assert_array_equal(a, np.array([0, 1]))
394
np.testing.assert_array_equal(b, np.array([0, 1]))
395
np.testing.assert_array_equal(c, np.array([0, 1]))
396
with pytest.raises(Exception):
397
_ = df4_wrapper.indexing_func_meta(lambda x: x.iloc[0, 0])[1:]
398
with pytest.raises(Exception):
399
_ = df4_wrapper.indexing_func_meta(lambda x: x.iloc[[0], 0])[1:]
400
401
# not grouped, column only
402
a, b, c = df4_wrapper_co.indexing_func_meta(lambda x: x.iloc[0])[1:]
403
np.testing.assert_array_equal(a, np.array([0, 1, 2]))
404
assert b == 0
405
assert c == 0
406
a, b, c = df4_wrapper_co.indexing_func_meta(lambda x: x.iloc[[0]])[1:]
407
np.testing.assert_array_equal(a, np.array([0, 1, 2]))
408
np.testing.assert_array_equal(b, np.array([0]))
409
np.testing.assert_array_equal(c, np.array([0]))
410
a, b, c = df4_wrapper_co.indexing_func_meta(lambda x: x.iloc[:2])[1:]
411
np.testing.assert_array_equal(a, np.array([0, 1, 2]))
412
np.testing.assert_array_equal(b, np.array([0, 1]))
413
np.testing.assert_array_equal(c, np.array([0, 1]))
414
with pytest.raises(Exception):
415
_ = sr2_wrapper_co.indexing_func_meta(lambda x: x.iloc[:2])[1:]
416
with pytest.raises(Exception):
417
_ = df4_wrapper_co.indexing_func_meta(lambda x: x.iloc[:, :2])[1:]
418
419
# grouped
420
a, b, c = sr2_grouped_wrapper.indexing_func_meta(lambda x: x.iloc[:2])[1:]
421
np.testing.assert_array_equal(a, np.array([0, 1]))
422
assert b == 0
423
assert c == 0
424
a, b, c = df4_grouped_wrapper.indexing_func_meta(lambda x: x.iloc[:2, 0])[1:]
425
np.testing.assert_array_equal(a, np.array([0, 1]))
426
assert b == 0
427
np.testing.assert_array_equal(c, np.array([0, 1]))
428
a, b, c = df4_grouped_wrapper.indexing_func_meta(lambda x: x.iloc[:2, 1])[1:]
429
np.testing.assert_array_equal(a, np.array([0, 1]))
430
assert b == 1
431
assert c == 2
432
a, b, c = df4_grouped_wrapper.indexing_func_meta(lambda x: x.iloc[:2, [1]])[1:]
433
np.testing.assert_array_equal(a, np.array([0, 1]))
434
np.testing.assert_array_equal(b, np.array([1]))
435
np.testing.assert_array_equal(c, np.array([2]))
436
a, b, c = df4_grouped_wrapper.indexing_func_meta(lambda x: x.iloc[:2, :2])[1:]
437
np.testing.assert_array_equal(a, np.array([0, 1]))
438
np.testing.assert_array_equal(b, np.array([0, 1]))
439
np.testing.assert_array_equal(c, np.array([0, 1, 2]))
440
with pytest.raises(Exception):
441
_ = df4_grouped_wrapper.indexing_func_meta(lambda x: x.iloc[0, :2])[1:]
442
443
# grouped, column only
444
a, b, c = df4_grouped_wrapper_co.indexing_func_meta(lambda x: x.iloc[0])[1:]
445
np.testing.assert_array_equal(a, np.array([0, 1, 2]))
446
assert b == 0
447
np.testing.assert_array_equal(c, np.array([0, 1]))
448
a, b, c = df4_grouped_wrapper_co.indexing_func_meta(lambda x: x.iloc[1])[1:]
449
np.testing.assert_array_equal(a, np.array([0, 1, 2]))
450
assert b == 1
451
assert c == 2
452
a, b, c = df4_grouped_wrapper_co.indexing_func_meta(lambda x: x.iloc[[1]])[1:]
453
np.testing.assert_array_equal(a, np.array([0, 1, 2]))
454
np.testing.assert_array_equal(b, np.array([1]))
455
np.testing.assert_array_equal(c, np.array([2]))
456
a, b, c = df4_grouped_wrapper_co.indexing_func_meta(lambda x: x.iloc[:2])[1:]
457
np.testing.assert_array_equal(a, np.array([0, 1, 2]))
458
np.testing.assert_array_equal(b, np.array([0, 1]))
459
np.testing.assert_array_equal(c, np.array([0, 1, 2]))
460
461
def test_indexing(self):
462
# not grouped
463
pd.testing.assert_index_equal(
464
sr2_wrapper.iloc[:2].index,
465
pd.Index(['x2', 'y2'], dtype='object', name='i2'))
466
pd.testing.assert_index_equal(
467
sr2_wrapper.iloc[:2].columns,
468
pd.Index(['a2'], dtype='object'))
469
assert sr2_wrapper.iloc[:2].ndim == 1
470
pd.testing.assert_index_equal(
471
df4_wrapper.iloc[0, :2].index,
472
pd.Index(['a6', 'b6'], dtype='object', name='c6'))
473
pd.testing.assert_index_equal(
474
df4_wrapper.iloc[0, :2].columns,
475
pd.Index(['x6'], dtype='object', name='i6'))
476
assert df4_wrapper.iloc[0, :2].ndim == 1
477
pd.testing.assert_index_equal(
478
df4_wrapper.iloc[:2, 0].index,
479
pd.Index(['x6', 'y6'], dtype='object', name='i6'))
480
pd.testing.assert_index_equal(
481
df4_wrapper.iloc[:2, 0].columns,
482
pd.Index(['a6'], dtype='object', name='c6'))
483
assert df4_wrapper.iloc[:2, 0].ndim == 1
484
pd.testing.assert_index_equal(
485
df4_wrapper.iloc[:2, [0]].index,
486
pd.Index(['x6', 'y6'], dtype='object', name='i6'))
487
pd.testing.assert_index_equal(
488
df4_wrapper.iloc[:2, [0]].columns,
489
pd.Index(['a6'], dtype='object', name='c6'))
490
assert df4_wrapper.iloc[:2, [0]].ndim == 2
491
pd.testing.assert_index_equal(
492
df4_wrapper.iloc[:2, :2].index,
493
pd.Index(['x6', 'y6'], dtype='object', name='i6'))
494
pd.testing.assert_index_equal(
495
df4_wrapper.iloc[:2, :2].columns,
496
pd.Index(['a6', 'b6'], dtype='object', name='c6'))
497
assert df4_wrapper.iloc[:2, :2].ndim == 2
498
499
# not grouped, column only
500
pd.testing.assert_index_equal(
501
df4_wrapper_co.iloc[0].index,
502
pd.Index(['x6', 'y6', 'z6'], dtype='object', name='i6'))
503
pd.testing.assert_index_equal(
504
df4_wrapper_co.iloc[0].columns,
505
pd.Index(['a6'], dtype='object', name='c6'))
506
assert df4_wrapper_co.iloc[0].ndim == 1
507
pd.testing.assert_index_equal(
508
df4_wrapper_co.iloc[[0]].index,
509
pd.Index(['x6', 'y6', 'z6'], dtype='object', name='i6'))
510
pd.testing.assert_index_equal(
511
df4_wrapper_co.iloc[[0]].columns,
512
pd.Index(['a6'], dtype='object', name='c6'))
513
assert df4_wrapper_co.iloc[[0]].ndim == 2
514
pd.testing.assert_index_equal(
515
df4_wrapper_co.iloc[:2].index,
516
pd.Index(['x6', 'y6', 'z6'], dtype='object', name='i6'))
517
pd.testing.assert_index_equal(
518
df4_wrapper_co.iloc[:2].columns,
519
pd.Index(['a6', 'b6'], dtype='object', name='c6'))
520
assert df4_wrapper_co.iloc[:2].ndim == 2
521
522
# grouped
523
pd.testing.assert_index_equal(
524
sr2_grouped_wrapper.iloc[:2].index,
525
pd.Index(['x2', 'y2'], dtype='object', name='i2'))
526
pd.testing.assert_index_equal(
527
sr2_grouped_wrapper.iloc[:2].columns,
528
pd.Index(['a2'], dtype='object'))
529
assert sr2_grouped_wrapper.iloc[:2].ndim == 1
530
assert sr2_grouped_wrapper.iloc[:2].grouped_ndim == 1
531
pd.testing.assert_index_equal(
532
sr2_grouped_wrapper.iloc[:2].grouper.group_by,
533
pd.Index(['g1'], dtype='object'))
534
pd.testing.assert_index_equal(
535
df4_grouped_wrapper.iloc[:2, 0].index,
536
pd.Index(['x6', 'y6'], dtype='object', name='i6'))
537
pd.testing.assert_index_equal(
538
df4_grouped_wrapper.iloc[:2, 0].columns,
539
pd.Index(['a6', 'b6'], dtype='object', name='c6'))
540
assert df4_grouped_wrapper.iloc[:2, 0].ndim == 2
541
assert df4_grouped_wrapper.iloc[:2, 0].grouped_ndim == 1
542
pd.testing.assert_index_equal(
543
df4_grouped_wrapper.iloc[:2, 0].grouper.group_by,
544
pd.Index(['g1', 'g1'], dtype='object'))
545
pd.testing.assert_index_equal(
546
df4_grouped_wrapper.iloc[:2, 1].index,
547
pd.Index(['x6', 'y6'], dtype='object', name='i6'))
548
pd.testing.assert_index_equal(
549
df4_grouped_wrapper.iloc[:2, 1].columns,
550
pd.Index(['c6'], dtype='object', name='c6'))
551
assert df4_grouped_wrapper.iloc[:2, 1].ndim == 1
552
assert df4_grouped_wrapper.iloc[:2, 1].grouped_ndim == 1
553
pd.testing.assert_index_equal(
554
df4_grouped_wrapper.iloc[:2, 1].grouper.group_by,
555
pd.Index(['g2'], dtype='object'))
556
pd.testing.assert_index_equal(
557
df4_grouped_wrapper.iloc[:2, [1]].index,
558
pd.Index(['x6', 'y6'], dtype='object', name='i6'))
559
pd.testing.assert_index_equal(
560
df4_grouped_wrapper.iloc[:2, [1]].columns,
561
pd.Index(['c6'], dtype='object', name='c6'))
562
assert df4_grouped_wrapper.iloc[:2, [1]].ndim == 2
563
assert df4_grouped_wrapper.iloc[:2, [1]].grouped_ndim == 2
564
pd.testing.assert_index_equal(
565
df4_grouped_wrapper.iloc[:2, [1]].grouper.group_by,
566
pd.Index(['g2'], dtype='object'))
567
pd.testing.assert_index_equal(
568
df4_grouped_wrapper.iloc[:2, :2].index,
569
pd.Index(['x6', 'y6'], dtype='object', name='i6'))
570
pd.testing.assert_index_equal(
571
df4_grouped_wrapper.iloc[:2, :2].columns,
572
pd.Index(['a6', 'b6', 'c6'], dtype='object', name='c6'))
573
assert df4_grouped_wrapper.iloc[:2, :2].ndim == 2
574
assert df4_grouped_wrapper.iloc[:2, :2].grouped_ndim == 2
575
pd.testing.assert_index_equal(
576
df4_grouped_wrapper.iloc[:2, :2].grouper.group_by,
577
pd.Index(['g1', 'g1', 'g2'], dtype='object'))
578
579
# grouped, column only
580
pd.testing.assert_index_equal(
581
df4_grouped_wrapper_co.iloc[0].index,
582
pd.Index(['x6', 'y6', 'z6'], dtype='object', name='i6'))
583
pd.testing.assert_index_equal(
584
df4_grouped_wrapper_co.iloc[0].columns,
585
pd.Index(['a6', 'b6'], dtype='object', name='c6'))
586
assert df4_grouped_wrapper_co.iloc[0].ndim == 2
587
assert df4_grouped_wrapper_co.iloc[0].grouped_ndim == 1
588
pd.testing.assert_index_equal(
589
df4_grouped_wrapper_co.iloc[0].grouper.group_by,
590
pd.Index(['g1', 'g1'], dtype='object'))
591
pd.testing.assert_index_equal(
592
df4_grouped_wrapper_co.iloc[1].index,
593
pd.Index(['x6', 'y6', 'z6'], dtype='object', name='i6'))
594
pd.testing.assert_index_equal(
595
df4_grouped_wrapper_co.iloc[1].columns,
596
pd.Index(['c6'], dtype='object', name='c6'))
597
assert df4_grouped_wrapper_co.iloc[1].ndim == 1
598
assert df4_grouped_wrapper_co.iloc[1].grouped_ndim == 1
599
pd.testing.assert_index_equal(
600
df4_grouped_wrapper_co.iloc[1].grouper.group_by,
601
pd.Index(['g2'], dtype='object'))
602
pd.testing.assert_index_equal(
603
df4_grouped_wrapper_co.iloc[[1]].index,
604
pd.Index(['x6', 'y6', 'z6'], dtype='object', name='i6'))
605
pd.testing.assert_index_equal(
606
df4_grouped_wrapper_co.iloc[[1]].columns,
607
pd.Index(['c6'], dtype='object', name='c6'))
608
assert df4_grouped_wrapper_co.iloc[[1]].ndim == 2
609
assert df4_grouped_wrapper_co.iloc[[1]].grouped_ndim == 2
610
pd.testing.assert_index_equal(
611
df4_grouped_wrapper_co.iloc[[1]].grouper.group_by,
612
pd.Index(['g2'], dtype='object'))
613
pd.testing.assert_index_equal(
614
df4_grouped_wrapper_co.iloc[:2].index,
615
pd.Index(['x6', 'y6', 'z6'], dtype='object', name='i6'))
616
pd.testing.assert_index_equal(
617
df4_grouped_wrapper_co.iloc[:2].columns,
618
pd.Index(['a6', 'b6', 'c6'], dtype='object', name='c6'))
619
assert df4_grouped_wrapper_co.iloc[:2].ndim == 2
620
assert df4_grouped_wrapper_co.iloc[:2].grouped_ndim == 2
621
pd.testing.assert_index_equal(
622
df4_grouped_wrapper_co.iloc[:2].grouper.group_by,
623
pd.Index(['g1', 'g1', 'g2'], dtype='object'))
624
625
def test_from_obj(self):
626
assert array_wrapper.ArrayWrapper.from_obj(sr2) == sr2_wrapper
627
assert array_wrapper.ArrayWrapper.from_obj(df4) == df4_wrapper
628
assert array_wrapper.ArrayWrapper.from_obj(sr2, column_only_select=True) == sr2_wrapper_co
629
assert array_wrapper.ArrayWrapper.from_obj(df4, column_only_select=True) == df4_wrapper_co
630
631
def test_from_shape(self):
632
assert array_wrapper.ArrayWrapper.from_shape((3,)) == \
633
array_wrapper.ArrayWrapper(
634
pd.RangeIndex(start=0, stop=3, step=1), pd.RangeIndex(start=0, stop=1, step=1), 1)
635
assert array_wrapper.ArrayWrapper.from_shape((3, 3)) == \
636
array_wrapper.ArrayWrapper.from_obj(pd.DataFrame(np.empty((3, 3))))
637
638
def test_columns(self):
639
pd.testing.assert_index_equal(df4_wrapper.columns, df4.columns)
640
pd.testing.assert_index_equal(df4_grouped_wrapper.columns, df4.columns)
641
pd.testing.assert_index_equal(df4_grouped_wrapper.get_columns(), pd.Index(['g1', 'g2'], dtype='object'))
642
643
def test_name(self):
644
assert sr2_wrapper.name == 'a2'
645
assert df4_wrapper.name is None
646
assert array_wrapper.ArrayWrapper.from_obj(pd.Series([0])).name is None
647
assert sr2_grouped_wrapper.name == 'a2'
648
assert sr2_grouped_wrapper.get_name() == 'g1'
649
assert df4_grouped_wrapper.name is None
650
assert df4_grouped_wrapper.get_name() is None
651
652
def test_ndim(self):
653
assert sr2_wrapper.ndim == 1
654
assert df4_wrapper.ndim == 2
655
assert sr2_grouped_wrapper.ndim == 1
656
assert sr2_grouped_wrapper.get_ndim() == 1
657
assert df4_grouped_wrapper.ndim == 2
658
assert df4_grouped_wrapper.get_ndim() == 2
659
assert df4_grouped_wrapper['g1'].ndim == 2
660
assert df4_grouped_wrapper['g1'].get_ndim() == 1
661
assert df4_grouped_wrapper['g2'].ndim == 1
662
assert df4_grouped_wrapper['g2'].get_ndim() == 1
663
664
def test_shape(self):
665
assert sr2_wrapper.shape == (3,)
666
assert df4_wrapper.shape == (3, 3)
667
assert sr2_grouped_wrapper.shape == (3,)
668
assert sr2_grouped_wrapper.get_shape() == (3,)
669
assert df4_grouped_wrapper.shape == (3, 3)
670
assert df4_grouped_wrapper.get_shape() == (3, 2)
671
672
def test_shape_2d(self):
673
assert sr2_wrapper.shape_2d == (3, 1)
674
assert df4_wrapper.shape_2d == (3, 3)
675
assert sr2_grouped_wrapper.shape_2d == (3, 1)
676
assert sr2_grouped_wrapper.get_shape_2d() == (3, 1)
677
assert df4_grouped_wrapper.shape_2d == (3, 3)
678
assert df4_grouped_wrapper.get_shape_2d() == (3, 2)
679
680
def test_freq(self):
681
assert sr2_wrapper.freq is None
682
assert sr2_wrapper.replace(freq='1D').freq == day_dt
683
assert sr2_wrapper.replace(index=pd.DatetimeIndex([
684
datetime(2020, 1, 1),
685
datetime(2020, 1, 2),
686
datetime(2020, 1, 3)
687
], freq='1D')).freq == day_dt
688
assert sr2_wrapper.replace(index=pd.Index([
689
datetime(2020, 1, 1),
690
datetime(2020, 1, 2),
691
datetime(2020, 1, 3)
692
])).freq == day_dt
693
694
def test_to_timedelta(self):
695
sr = pd.Series([1, 2, np.nan], index=['x', 'y', 'z'], name='name')
696
pd.testing.assert_series_equal(
697
array_wrapper.ArrayWrapper.from_obj(sr, freq='1 days').to_timedelta(sr),
698
pd.Series(
699
np.array([86400000000000, 172800000000000, 'NaT'], dtype='timedelta64[ns]'),
700
index=sr.index,
701
name=sr.name
702
)
703
)
704
df = sr.to_frame()
705
pd.testing.assert_frame_equal(
706
array_wrapper.ArrayWrapper.from_obj(df, freq='1 days').to_timedelta(df),
707
pd.DataFrame(
708
np.array([86400000000000, 172800000000000, 'NaT'], dtype='timedelta64[ns]'),
709
index=df.index,
710
columns=df.columns
711
)
712
)
713
714
def test_wrap(self):
715
pd.testing.assert_series_equal(
716
array_wrapper.ArrayWrapper(index=sr1.index, columns=[0], ndim=1).wrap(a1), # empty
717
pd.Series(a1, index=sr1.index, name=None)
718
)
719
pd.testing.assert_series_equal(
720
array_wrapper.ArrayWrapper(index=sr1.index, columns=[sr1.name], ndim=1).wrap(a1),
721
pd.Series(a1, index=sr1.index, name=sr1.name)
722
)
723
pd.testing.assert_frame_equal(
724
array_wrapper.ArrayWrapper(index=sr1.index, columns=[sr1.name], ndim=2).wrap(a1),
725
pd.DataFrame(a1, index=sr1.index, columns=[sr1.name])
726
)
727
pd.testing.assert_series_equal(
728
array_wrapper.ArrayWrapper(index=sr2.index, columns=[sr2.name], ndim=1).wrap(a2),
729
pd.Series(a2, index=sr2.index, name=sr2.name)
730
)
731
pd.testing.assert_frame_equal(
732
array_wrapper.ArrayWrapper(index=sr2.index, columns=[sr2.name], ndim=2).wrap(a2),
733
pd.DataFrame(a2, index=sr2.index, columns=[sr2.name])
734
)
735
pd.testing.assert_series_equal(
736
array_wrapper.ArrayWrapper(index=df2.index, columns=df2.columns, ndim=1).wrap(a2),
737
pd.Series(a2, index=df2.index, name=df2.columns[0])
738
)
739
pd.testing.assert_frame_equal(
740
array_wrapper.ArrayWrapper(index=df2.index, columns=df2.columns, ndim=2).wrap(a2),
741
pd.DataFrame(a2, index=df2.index, columns=df2.columns)
742
)
743
pd.testing.assert_frame_equal(
744
array_wrapper.ArrayWrapper.from_obj(df2).wrap(a2, index=df4.index),
745
pd.DataFrame(a2, index=df4.index, columns=df2.columns)
746
)
747
pd.testing.assert_frame_equal(
748
array_wrapper.ArrayWrapper(index=df4.index, columns=df4.columns, ndim=2).wrap(
749
np.array([[0, 0, np.nan], [1, np.nan, 1], [2, 2, np.nan]]),
750
fillna=-1
751
),
752
pd.DataFrame([
753
[0., 0., -1.],
754
[1., -1., 1.],
755
[2., 2., -1.]
756
], index=df4.index, columns=df4.columns)
757
)
758
pd.testing.assert_frame_equal(
759
array_wrapper.ArrayWrapper(index=df4.index, columns=df4.columns, ndim=2).wrap(
760
np.array([[0, 0, 0], [1, 1, 1], [2, 2, 2]]),
761
to_index=True
762
),
763
pd.DataFrame([
764
['x6', 'x6', 'x6'],
765
['y6', 'y6', 'y6'],
766
['z6', 'z6', 'z6']
767
], index=df4.index, columns=df4.columns)
768
)
769
pd.testing.assert_frame_equal(
770
array_wrapper.ArrayWrapper(index=df4.index, columns=df4.columns, ndim=2, freq='d').wrap(
771
np.array([[0, 0, 0], [1, 1, 1], [2, 2, 2]]),
772
to_timedelta=True
773
),
774
pd.DataFrame([
775
[pd.Timedelta(days=0), pd.Timedelta(days=0), pd.Timedelta(days=0)],
776
[pd.Timedelta(days=1), pd.Timedelta(days=1), pd.Timedelta(days=1)],
777
[pd.Timedelta(days=2), pd.Timedelta(days=2), pd.Timedelta(days=2)]
778
], index=df4.index, columns=df4.columns)
779
)
780
781
def test_wrap_reduced(self):
782
# sr to value
783
assert sr2_wrapper.wrap_reduced(0) == 0
784
assert sr2_wrapper.wrap_reduced(np.array([0])) == 0 # result of computation on 2d
785
# sr to array
786
pd.testing.assert_series_equal(
787
sr2_wrapper.wrap_reduced(np.array([0, 1])),
788
pd.Series(np.array([0, 1]), name=sr2.name)
789
)
790
pd.testing.assert_series_equal(
791
sr2_wrapper.wrap_reduced(np.array([0, 1]), name_or_index=['x', 'y']),
792
pd.Series(np.array([0, 1]), index=['x', 'y'], name=sr2.name)
793
)
794
pd.testing.assert_series_equal(
795
sr2_wrapper.wrap_reduced(np.array([0, 1]), name_or_index=['x', 'y'], columns=[0]),
796
pd.Series(np.array([0, 1]), index=['x', 'y'], name=None)
797
)
798
# df to value
799
assert df2_wrapper.wrap_reduced(0) == 0
800
assert df4_wrapper.wrap_reduced(0) == 0
801
# df to value per column
802
pd.testing.assert_series_equal(
803
df4_wrapper.wrap_reduced(np.array([0, 1, 2]), name_or_index='test'),
804
pd.Series(np.array([0, 1, 2]), index=df4.columns, name='test')
805
)
806
pd.testing.assert_series_equal(
807
df4_wrapper.wrap_reduced(np.array([0, 1, 2]), columns=['m', 'n', 'l'], name_or_index='test'),
808
pd.Series(np.array([0, 1, 2]), index=['m', 'n', 'l'], name='test')
809
)
810
# df to array per column
811
pd.testing.assert_frame_equal(
812
df4_wrapper.wrap_reduced(np.array([[0, 1, 2], [3, 4, 5]]), name_or_index=['x', 'y']),
813
pd.DataFrame(np.array([[0, 1, 2], [3, 4, 5]]), index=['x', 'y'], columns=df4.columns)
814
)
815
pd.testing.assert_frame_equal(
816
df4_wrapper.wrap_reduced(
817
np.array([[0, 1, 2], [3, 4, 5]]),
818
name_or_index=['x', 'y'], columns=['m', 'n', 'l']),
819
pd.DataFrame(np.array([[0, 1, 2], [3, 4, 5]]), index=['x', 'y'], columns=['m', 'n', 'l'])
820
)
821
822
def test_grouped_wrapping(self):
823
pd.testing.assert_frame_equal(
824
df4_grouped_wrapper_co.wrap(np.array([[1, 2], [3, 4], [5, 6]])),
825
pd.DataFrame(np.array([
826
[1, 2],
827
[3, 4],
828
[5, 6]
829
]), index=df4.index, columns=pd.Index(['g1', 'g2'], dtype='object'))
830
)
831
pd.testing.assert_series_equal(
832
df4_grouped_wrapper_co.wrap_reduced(np.array([1, 2])),
833
pd.Series(np.array([1, 2]), index=pd.Index(['g1', 'g2'], dtype='object'))
834
)
835
pd.testing.assert_frame_equal(
836
df4_grouped_wrapper_co.wrap(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), group_by=False),
837
pd.DataFrame(np.array([
838
[1, 2, 3],
839
[4, 5, 6],
840
[7, 8, 9]
841
]), index=df4.index, columns=df4.columns)
842
)
843
pd.testing.assert_series_equal(
844
df4_grouped_wrapper_co.wrap_reduced(np.array([1, 2, 3]), group_by=False),
845
pd.Series(np.array([1, 2, 3]), index=df4.columns)
846
)
847
pd.testing.assert_series_equal(
848
df4_grouped_wrapper_co.iloc[0].wrap(np.array([1, 2, 3])),
849
pd.Series(np.array([1, 2, 3]), index=df4.index, name='g1')
850
)
851
assert df4_grouped_wrapper_co.iloc[0].wrap_reduced(np.array([1])) == 1
852
pd.testing.assert_series_equal(
853
df4_grouped_wrapper_co.iloc[0].wrap(np.array([[1], [2], [3]])),
854
pd.Series(np.array([1, 2, 3]), index=df4.index, name='g1')
855
)
856
pd.testing.assert_frame_equal(
857
df4_grouped_wrapper_co.iloc[0].wrap(np.array([[1, 2], [3, 4], [5, 6]]), group_by=False),
858
pd.DataFrame(np.array([
859
[1, 2],
860
[3, 4],
861
[5, 6]
862
]), index=df4.index, columns=df4.columns[:2])
863
)
864
pd.testing.assert_series_equal(
865
df4_grouped_wrapper_co.iloc[0].wrap_reduced(np.array([1, 2]), group_by=False),
866
pd.Series(np.array([1, 2]), index=df4.columns[:2])
867
)
868
pd.testing.assert_frame_equal(
869
df4_grouped_wrapper_co.iloc[[0]].wrap(np.array([1, 2, 3])),
870
pd.DataFrame(np.array([
871
[1],
872
[2],
873
[3]
874
]), index=df4.index, columns=pd.Index(['g1'], dtype='object'))
875
)
876
pd.testing.assert_series_equal(
877
df4_grouped_wrapper_co.iloc[[0]].wrap_reduced(np.array([1])),
878
pd.Series(np.array([1]), index=pd.Index(['g1'], dtype='object'))
879
)
880
pd.testing.assert_frame_equal(
881
df4_grouped_wrapper_co.iloc[[0]].wrap(np.array([[1, 2], [3, 4], [5, 6]]), group_by=False),
882
pd.DataFrame(np.array([
883
[1, 2],
884
[3, 4],
885
[5, 6]
886
]), index=df4.index, columns=df4.columns[:2])
887
)
888
pd.testing.assert_series_equal(
889
df4_grouped_wrapper_co.iloc[[0]].wrap_reduced(np.array([1, 2]), group_by=False),
890
pd.Series(np.array([1, 2]), index=df4.columns[:2])
891
)
892
pd.testing.assert_series_equal(
893
df4_grouped_wrapper_co.iloc[1].wrap(np.array([1, 2, 3])),
894
pd.Series(np.array([1, 2, 3]), index=df4.index, name='g2')
895
)
896
assert df4_grouped_wrapper_co.iloc[1].wrap_reduced(np.array([1])) == 1
897
pd.testing.assert_series_equal(
898
df4_grouped_wrapper_co.iloc[1].wrap(np.array([1, 2, 3]), group_by=False),
899
pd.Series(np.array([1, 2, 3]), index=df4.index, name=df4.columns[2])
900
)
901
assert df4_grouped_wrapper_co.iloc[1].wrap_reduced(np.array([1]), group_by=False) == 1
902
pd.testing.assert_frame_equal(
903
df4_grouped_wrapper_co.iloc[[1]].wrap(np.array([1, 2, 3])),
904
pd.DataFrame(np.array([
905
[1],
906
[2],
907
[3]
908
]), index=df4.index, columns=pd.Index(['g2'], dtype='object'))
909
)
910
pd.testing.assert_series_equal(
911
df4_grouped_wrapper_co.iloc[[1]].wrap_reduced(np.array([1])),
912
pd.Series(np.array([1]), index=pd.Index(['g2'], dtype='object'))
913
)
914
pd.testing.assert_frame_equal(
915
df4_grouped_wrapper_co.iloc[[1]].wrap(np.array([1, 2, 3]), group_by=False),
916
pd.DataFrame(np.array([
917
[1],
918
[2],
919
[3]
920
]), index=df4.index, columns=df4.columns[2:])
921
)
922
pd.testing.assert_series_equal(
923
df4_grouped_wrapper_co.iloc[[1]].wrap_reduced(np.array([1]), group_by=False),
924
pd.Series(np.array([1]), index=df4.columns[2:])
925
)
926
927
def test_dummy(self):
928
pd.testing.assert_index_equal(
929
sr2_wrapper.dummy().index,
930
sr2_wrapper.index
931
)
932
pd.testing.assert_index_equal(
933
sr2_wrapper.dummy().to_frame().columns,
934
sr2_wrapper.columns
935
)
936
pd.testing.assert_index_equal(
937
df4_wrapper.dummy().index,
938
df4_wrapper.index
939
)
940
pd.testing.assert_index_equal(
941
df4_wrapper.dummy().columns,
942
df4_wrapper.columns
943
)
944
pd.testing.assert_index_equal(
945
sr2_grouped_wrapper.dummy().index,
946
sr2_grouped_wrapper.index
947
)
948
pd.testing.assert_index_equal(
949
sr2_grouped_wrapper.dummy().to_frame().columns,
950
sr2_grouped_wrapper.get_columns()
951
)
952
pd.testing.assert_index_equal(
953
df4_grouped_wrapper.dummy().index,
954
df4_grouped_wrapper.index
955
)
956
pd.testing.assert_index_equal(
957
df4_grouped_wrapper.dummy().columns,
958
df4_grouped_wrapper.get_columns()
959
)
960
961
962
sr2_wrapping = array_wrapper.Wrapping(sr2_wrapper)
963
df4_wrapping = array_wrapper.Wrapping(df4_wrapper)
964
965
sr2_grouped_wrapping = array_wrapper.Wrapping(sr2_grouped_wrapper)
966
df4_grouped_wrapping = array_wrapper.Wrapping(df4_grouped_wrapper)
967
968
969
class TestWrapping:
970
def test_regroup(self):
971
assert df4_wrapping.regroup(None) == df4_wrapping
972
assert df4_wrapping.regroup(False) == df4_wrapping
973
assert df4_grouped_wrapping.regroup(None) == df4_grouped_wrapping
974
assert df4_grouped_wrapping.regroup(df4_grouped_wrapper.grouper.group_by) == df4_grouped_wrapping
975
pd.testing.assert_index_equal(
976
df4_wrapping.regroup(df4_grouped_wrapper.grouper.group_by).wrapper.grouper.group_by,
977
df4_grouped_wrapper.grouper.group_by
978
)
979
assert df4_grouped_wrapping.regroup(False).wrapper.grouper.group_by is None
980
981
def test_select_one(self):
982
assert sr2_wrapping.select_one() == sr2_wrapping
983
assert sr2_grouped_wrapping.select_one() == sr2_grouped_wrapping
984
pd.testing.assert_index_equal(
985
df4_wrapping.select_one(column='a6').wrapper.get_columns(),
986
pd.Index(['a6'], dtype='object', name='c6')
987
)
988
pd.testing.assert_index_equal(
989
df4_grouped_wrapping.select_one(column='g1').wrapper.get_columns(),
990
pd.Index(['g1'], dtype='object')
991
)
992
with pytest.raises(Exception):
993
df4_wrapping.select_one()
994
with pytest.raises(Exception):
995
df4_grouped_wrapping.select_one()
996
997
998
# ############# index_fns.py ############# #
999
1000
class TestIndexFns:
1001
def test_get_index(self):
1002
pd.testing.assert_index_equal(index_fns.get_index(sr1, 0), sr1.index)
1003
pd.testing.assert_index_equal(index_fns.get_index(sr1, 1), pd.Index([sr1.name]))
1004
pd.testing.assert_index_equal(index_fns.get_index(pd.Series([1, 2, 3]), 1), pd.Index([0])) # empty
1005
pd.testing.assert_index_equal(index_fns.get_index(df1, 0), df1.index)
1006
pd.testing.assert_index_equal(index_fns.get_index(df1, 1), df1.columns)
1007
1008
def test_index_from_values(self):
1009
pd.testing.assert_index_equal(
1010
index_fns.index_from_values([0.1, 0.2], name='a'),
1011
pd.Index([0.1, 0.2], dtype='float64', name='a')
1012
)
1013
pd.testing.assert_index_equal(
1014
index_fns.index_from_values(np.tile(np.arange(1, 4)[:, None][:, None], (1, 3, 3)), name='b'),
1015
pd.Index([1, 2, 3], dtype='int64', name='b')
1016
)
1017
pd.testing.assert_index_equal(
1018
index_fns.index_from_values(np.random.uniform(size=(3, 3, 3)), name='c'),
1019
pd.Index(['array_0', 'array_1', 'array_2'], dtype='object', name='c')
1020
)
1021
pd.testing.assert_index_equal(
1022
index_fns.index_from_values([(1, 2), (3, 4), (5, 6)], name='c'),
1023
pd.Index(['tuple_0', 'tuple_1', 'tuple_2'], dtype='object', name='c')
1024
)
1025
1026
class A:
1027
pass
1028
1029
class B:
1030
pass
1031
1032
class C:
1033
pass
1034
1035
pd.testing.assert_index_equal(
1036
index_fns.index_from_values([A(), B(), C()], name='c'),
1037
pd.Index(['A_0', 'B_1', 'C_2'], dtype='object', name='c')
1038
)
1039
1040
def test_repeat_index(self):
1041
i = pd.Index([1, 2, 3], name='i')
1042
pd.testing.assert_index_equal(
1043
index_fns.repeat_index(i, 3),
1044
pd.Index([1, 1, 1, 2, 2, 2, 3, 3, 3], dtype='int64', name='i')
1045
)
1046
pd.testing.assert_index_equal(
1047
index_fns.repeat_index(multi_i, 3),
1048
pd.MultiIndex.from_tuples([
1049
('x7', 'x8'),
1050
('x7', 'x8'),
1051
('x7', 'x8'),
1052
('y7', 'y8'),
1053
('y7', 'y8'),
1054
('y7', 'y8'),
1055
('z7', 'z8'),
1056
('z7', 'z8'),
1057
('z7', 'z8')
1058
], names=['i7', 'i8'])
1059
)
1060
pd.testing.assert_index_equal(
1061
index_fns.repeat_index([0], 3), # empty
1062
pd.Index([0, 1, 2], dtype='int64')
1063
)
1064
pd.testing.assert_index_equal(
1065
index_fns.repeat_index(sr_none.index, 3), # simple range
1066
pd.RangeIndex(start=0, stop=3, step=1)
1067
)
1068
1069
def test_tile_index(self):
1070
i = pd.Index([1, 2, 3], name='i')
1071
pd.testing.assert_index_equal(
1072
index_fns.tile_index(i, 3),
1073
pd.Index([1, 2, 3, 1, 2, 3, 1, 2, 3], dtype='int64', name='i')
1074
)
1075
pd.testing.assert_index_equal(
1076
index_fns.tile_index(multi_i, 3),
1077
pd.MultiIndex.from_tuples([
1078
('x7', 'x8'),
1079
('y7', 'y8'),
1080
('z7', 'z8'),
1081
('x7', 'x8'),
1082
('y7', 'y8'),
1083
('z7', 'z8'),
1084
('x7', 'x8'),
1085
('y7', 'y8'),
1086
('z7', 'z8')
1087
], names=['i7', 'i8'])
1088
)
1089
pd.testing.assert_index_equal(
1090
index_fns.tile_index([0], 3), # empty
1091
pd.Index([0, 1, 2], dtype='int64')
1092
)
1093
pd.testing.assert_index_equal(
1094
index_fns.tile_index(sr_none.index, 3), # simple range
1095
pd.RangeIndex(start=0, stop=3, step=1)
1096
)
1097
1098
def test_stack_indexes(self):
1099
pd.testing.assert_index_equal(
1100
index_fns.stack_indexes([sr2.index, df2.index, df5.index]),
1101
pd.MultiIndex.from_tuples([
1102
('x2', 'x4', 'x7', 'x8'),
1103
('y2', 'y4', 'y7', 'y8'),
1104
('z2', 'z4', 'z7', 'z8')
1105
], names=['i2', 'i4', 'i7', 'i8'])
1106
)
1107
pd.testing.assert_index_equal(
1108
index_fns.stack_indexes([sr2.index, df2.index, sr2.index], drop_duplicates=False),
1109
pd.MultiIndex.from_tuples([
1110
('x2', 'x4', 'x2'),
1111
('y2', 'y4', 'y2'),
1112
('z2', 'z4', 'z2')
1113
], names=['i2', 'i4', 'i2'])
1114
)
1115
pd.testing.assert_index_equal(
1116
index_fns.stack_indexes([sr2.index, df2.index, sr2.index], drop_duplicates=True),
1117
pd.MultiIndex.from_tuples([
1118
('x4', 'x2'),
1119
('y4', 'y2'),
1120
('z4', 'z2')
1121
], names=['i4', 'i2'])
1122
)
1123
pd.testing.assert_index_equal(
1124
index_fns.stack_indexes([pd.Index([1, 1]), pd.Index([2, 3])], drop_redundant=True),
1125
pd.Index([2, 3])
1126
)
1127
1128
def test_combine_indexes(self):
1129
pd.testing.assert_index_equal(
1130
index_fns.combine_indexes([pd.Index([1]), pd.Index([2, 3])], drop_redundant=False),
1131
pd.MultiIndex.from_tuples([
1132
(1, 2),
1133
(1, 3)
1134
])
1135
)
1136
pd.testing.assert_index_equal(
1137
index_fns.combine_indexes([pd.Index([1]), pd.Index([2, 3])], drop_redundant=True),
1138
pd.Index([2, 3], dtype='int64')
1139
)
1140
pd.testing.assert_index_equal(
1141
index_fns.combine_indexes([pd.Index([1], name='i'), pd.Index([2, 3])], drop_redundant=True),
1142
pd.MultiIndex.from_tuples([
1143
(1, 2),
1144
(1, 3)
1145
], names=['i', None])
1146
)
1147
pd.testing.assert_index_equal(
1148
index_fns.combine_indexes([pd.Index([1, 2]), pd.Index([3])], drop_redundant=False),
1149
pd.MultiIndex.from_tuples([
1150
(1, 3),
1151
(2, 3)
1152
])
1153
)
1154
pd.testing.assert_index_equal(
1155
index_fns.combine_indexes([pd.Index([1, 2]), pd.Index([3])], drop_redundant=True),
1156
pd.Index([1, 2], dtype='int64')
1157
)
1158
pd.testing.assert_index_equal(
1159
index_fns.combine_indexes([pd.Index([1]), pd.Index([2, 3])], drop_redundant=(False, True)),
1160
pd.Index([2, 3], dtype='int64')
1161
)
1162
pd.testing.assert_index_equal(
1163
index_fns.combine_indexes([df2.index, df5.index]),
1164
pd.MultiIndex.from_tuples([
1165
('x4', 'x7', 'x8'),
1166
('x4', 'y7', 'y8'),
1167
('x4', 'z7', 'z8'),
1168
('y4', 'x7', 'x8'),
1169
('y4', 'y7', 'y8'),
1170
('y4', 'z7', 'z8'),
1171
('z4', 'x7', 'x8'),
1172
('z4', 'y7', 'y8'),
1173
('z4', 'z7', 'z8')
1174
], names=['i4', 'i7', 'i8'])
1175
)
1176
1177
def test_drop_levels(self):
1178
pd.testing.assert_index_equal(
1179
index_fns.drop_levels(multi_i, 'i7'),
1180
pd.Index(['x8', 'y8', 'z8'], dtype='object', name='i8')
1181
)
1182
pd.testing.assert_index_equal(
1183
index_fns.drop_levels(multi_i, 'i8'),
1184
pd.Index(['x7', 'y7', 'z7'], dtype='object', name='i7')
1185
)
1186
pd.testing.assert_index_equal(
1187
index_fns.drop_levels(multi_i, 'i9', strict=False),
1188
multi_i
1189
)
1190
with pytest.raises(Exception):
1191
_ = index_fns.drop_levels(multi_i, 'i9')
1192
pd.testing.assert_index_equal(
1193
index_fns.drop_levels(multi_i, ['i7', 'i8'], strict=False), # won't do anything
1194
pd.MultiIndex.from_tuples([
1195
('x7', 'x8'),
1196
('y7', 'y8'),
1197
('z7', 'z8')
1198
], names=['i7', 'i8'])
1199
)
1200
with pytest.raises(Exception):
1201
_ = index_fns.drop_levels(multi_i, ['i7', 'i8'])
1202
1203
def test_rename_levels(self):
1204
i = pd.Index([1, 2, 3], name='i')
1205
pd.testing.assert_index_equal(
1206
index_fns.rename_levels(i, {'i': 'f'}),
1207
pd.Index([1, 2, 3], dtype='int64', name='f')
1208
)
1209
pd.testing.assert_index_equal(
1210
index_fns.rename_levels(i, {'a': 'b'}, strict=False),
1211
i
1212
)
1213
with pytest.raises(Exception):
1214
_ = index_fns.rename_levels(i, {'a': 'b'}, strict=True)
1215
pd.testing.assert_index_equal(
1216
index_fns.rename_levels(multi_i, {'i7': 'f7', 'i8': 'f8'}),
1217
pd.MultiIndex.from_tuples([
1218
('x7', 'x8'),
1219
('y7', 'y8'),
1220
('z7', 'z8')
1221
], names=['f7', 'f8'])
1222
)
1223
1224
def test_select_levels(self):
1225
pd.testing.assert_index_equal(
1226
index_fns.select_levels(multi_i, 'i7'),
1227
pd.Index(['x7', 'y7', 'z7'], dtype='object', name='i7')
1228
)
1229
pd.testing.assert_index_equal(
1230
index_fns.select_levels(multi_i, ['i7']),
1231
pd.MultiIndex.from_tuples([
1232
('x7',),
1233
('y7',),
1234
('z7',)
1235
], names=['i7'])
1236
)
1237
pd.testing.assert_index_equal(
1238
index_fns.select_levels(multi_i, ['i7', 'i8']),
1239
pd.MultiIndex.from_tuples([
1240
('x7', 'x8'),
1241
('y7', 'y8'),
1242
('z7', 'z8')
1243
], names=['i7', 'i8'])
1244
)
1245
1246
def test_drop_redundant_levels(self):
1247
pd.testing.assert_index_equal(
1248
index_fns.drop_redundant_levels(pd.Index(['a', 'a'])),
1249
pd.Index(['a', 'a'], dtype='object')
1250
) # if one unnamed, leaves as-is
1251
pd.testing.assert_index_equal(
1252
index_fns.drop_redundant_levels(pd.MultiIndex.from_arrays([['a', 'a'], ['b', 'b']])),
1253
pd.MultiIndex.from_tuples([
1254
('a', 'b'),
1255
('a', 'b')
1256
]) # if all unnamed, leaves as-is
1257
)
1258
pd.testing.assert_index_equal(
1259
index_fns.drop_redundant_levels(pd.MultiIndex.from_arrays([['a', 'a'], ['b', 'b']], names=['hi', None])),
1260
pd.Index(['a', 'a'], dtype='object', name='hi') # removes level with single unnamed value
1261
)
1262
pd.testing.assert_index_equal(
1263
index_fns.drop_redundant_levels(pd.MultiIndex.from_arrays([['a', 'b'], ['a', 'b']], names=['hi', 'hi2'])),
1264
pd.MultiIndex.from_tuples([
1265
('a', 'a'),
1266
('b', 'b')
1267
], names=['hi', 'hi2']) # legit
1268
)
1269
pd.testing.assert_index_equal( # ignores 0-to-n
1270
index_fns.drop_redundant_levels(pd.MultiIndex.from_arrays([[0, 1], ['a', 'b']], names=[None, 'hi2'])),
1271
pd.Index(['a', 'b'], dtype='object', name='hi2')
1272
)
1273
pd.testing.assert_index_equal( # legit
1274
index_fns.drop_redundant_levels(pd.MultiIndex.from_arrays([[0, 2], ['a', 'b']], names=[None, 'hi2'])),
1275
pd.MultiIndex.from_tuples([
1276
(0, 'a'),
1277
(2, 'b')
1278
], names=[None, 'hi2'])
1279
)
1280
pd.testing.assert_index_equal( # legit (w/ name)
1281
index_fns.drop_redundant_levels(pd.MultiIndex.from_arrays([[0, 1], ['a', 'b']], names=['hi', 'hi2'])),
1282
pd.MultiIndex.from_tuples([
1283
(0, 'a'),
1284
(1, 'b')
1285
], names=['hi', 'hi2'])
1286
)
1287
1288
def test_drop_duplicate_levels(self):
1289
pd.testing.assert_index_equal(
1290
index_fns.drop_duplicate_levels(pd.MultiIndex.from_arrays(
1291
[[1, 2, 3], [1, 2, 3]], names=['a', 'a'])),
1292
pd.Index([1, 2, 3], dtype='int64', name='a')
1293
)
1294
pd.testing.assert_index_equal(
1295
index_fns.drop_duplicate_levels(pd.MultiIndex.from_tuples(
1296
[(0, 1, 2, 1), ('a', 'b', 'c', 'b')], names=['x', 'y', 'z', 'y']), keep='last'),
1297
pd.MultiIndex.from_tuples([
1298
(0, 2, 1),
1299
('a', 'c', 'b')
1300
], names=['x', 'z', 'y'])
1301
)
1302
pd.testing.assert_index_equal(
1303
index_fns.drop_duplicate_levels(pd.MultiIndex.from_tuples(
1304
[(0, 1, 2, 1), ('a', 'b', 'c', 'b')], names=['x', 'y', 'z', 'y']), keep='first'),
1305
pd.MultiIndex.from_tuples([
1306
(0, 1, 2),
1307
('a', 'b', 'c')
1308
], names=['x', 'y', 'z'])
1309
)
1310
1311
def test_align_index_to(self):
1312
index1 = pd.Index(['c', 'b', 'a'], name='name1')
1313
assert index_fns.align_index_to(index1, index1) == pd.IndexSlice[:]
1314
index2 = pd.Index(['a', 'b', 'c', 'a', 'b', 'c'], name='name1')
1315
np.testing.assert_array_equal(
1316
index_fns.align_index_to(index1, index2),
1317
np.array([2, 1, 0, 2, 1, 0])
1318
)
1319
with pytest.raises(Exception):
1320
index_fns.align_index_to(pd.Index(['a']), pd.Index(['a', 'b', 'c']))
1321
index3 = pd.MultiIndex.from_tuples([
1322
(0, 'c'),
1323
(0, 'b'),
1324
(0, 'a'),
1325
(1, 'c'),
1326
(1, 'b'),
1327
(1, 'a')
1328
], names=['name2', 'name1'])
1329
np.testing.assert_array_equal(
1330
index_fns.align_index_to(index1, index3),
1331
np.array([0, 1, 2, 0, 1, 2])
1332
)
1333
with pytest.raises(Exception):
1334
index_fns.align_index_to(
1335
pd.Index(['b', 'a'], name='name1'),
1336
index3
1337
)
1338
with pytest.raises(Exception):
1339
index_fns.align_index_to(
1340
pd.Index(['c', 'b', 'a', 'a'], name='name1'),
1341
index3
1342
)
1343
index4 = pd.MultiIndex.from_tuples([
1344
(0, 'a'),
1345
(0, 'b'),
1346
(0, 'c'),
1347
(1, 'a'),
1348
(1, 'b'),
1349
(1, 'c')
1350
], names=['name2', 'name1'])
1351
np.testing.assert_array_equal(
1352
index_fns.align_index_to(index1, index4),
1353
np.array([2, 1, 0, 2, 1, 0])
1354
)
1355
1356
def test_align_indexes(self):
1357
index1 = pd.Index(['a', 'b', 'c'])
1358
index2 = pd.MultiIndex.from_tuples([
1359
(0, 'a'),
1360
(0, 'b'),
1361
(0, 'c'),
1362
(1, 'a'),
1363
(1, 'b'),
1364
(1, 'c')
1365
])
1366
index3 = pd.MultiIndex.from_tuples([
1367
(2, 0, 'a'),
1368
(2, 0, 'b'),
1369
(2, 0, 'c'),
1370
(2, 1, 'a'),
1371
(2, 1, 'b'),
1372
(2, 1, 'c'),
1373
(3, 0, 'a'),
1374
(3, 0, 'b'),
1375
(3, 0, 'c'),
1376
(3, 1, 'a'),
1377
(3, 1, 'b'),
1378
(3, 1, 'c')
1379
])
1380
indices1, indices2, indices3 = index_fns.align_indexes([index1, index2, index3])
1381
np.testing.assert_array_equal(
1382
indices1,
1383
np.array([0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2])
1384
)
1385
np.testing.assert_array_equal(
1386
indices2,
1387
np.array([0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5])
1388
)
1389
assert indices3 == pd.IndexSlice[:]
1390
1391
def test_pick_levels(self):
1392
index = index_fns.stack_indexes([multi_i, multi_c])
1393
assert index_fns.pick_levels(index, required_levels=[], optional_levels=[]) \
1394
== ([], [])
1395
assert index_fns.pick_levels(index, required_levels=['c8', 'c7', 'i8', 'i7'], optional_levels=[]) \
1396
== ([3, 2, 1, 0], [])
1397
assert index_fns.pick_levels(index, required_levels=['c8', None, 'i8', 'i7'], optional_levels=[]) \
1398
== ([3, 2, 1, 0], [])
1399
assert index_fns.pick_levels(index, required_levels=[None, 'c7', 'i8', 'i7'], optional_levels=[]) \
1400
== ([3, 2, 1, 0], [])
1401
assert index_fns.pick_levels(index, required_levels=[None, None, None, None], optional_levels=[]) \
1402
== ([0, 1, 2, 3], [])
1403
assert index_fns.pick_levels(index, required_levels=['c8', 'c7', 'i8'], optional_levels=['i7']) \
1404
== ([3, 2, 1], [0])
1405
assert index_fns.pick_levels(index, required_levels=['c8', None, 'i8'], optional_levels=['i7']) \
1406
== ([3, 2, 1], [0])
1407
assert index_fns.pick_levels(index, required_levels=[None, 'c7', 'i8'], optional_levels=['i7']) \
1408
== ([3, 2, 1], [0])
1409
assert index_fns.pick_levels(index, required_levels=[None, None, None, None], optional_levels=[None]) \
1410
== ([0, 1, 2, 3], [None])
1411
with pytest.raises(Exception):
1412
index_fns.pick_levels(index, required_levels=['i8', 'i8', 'i8', 'i8'], optional_levels=[])
1413
with pytest.raises(Exception):
1414
index_fns.pick_levels(index, required_levels=['c8', 'c7', 'i8', 'i7'], optional_levels=['i7'])
1415
1416
1417
# ############# reshape_fns.py ############# #
1418
1419
1420
class TestReshapeFns:
1421
def test_soft_to_ndim(self):
1422
np.testing.assert_array_equal(reshape_fns.soft_to_ndim(a2, 1), a2)
1423
pd.testing.assert_series_equal(reshape_fns.soft_to_ndim(sr2, 1), sr2)
1424
pd.testing.assert_series_equal(reshape_fns.soft_to_ndim(df2, 1), df2.iloc[:, 0])
1425
pd.testing.assert_frame_equal(reshape_fns.soft_to_ndim(df4, 1), df4) # cannot -> do nothing
1426
np.testing.assert_array_equal(reshape_fns.soft_to_ndim(a2, 2), a2[:, None])
1427
pd.testing.assert_frame_equal(reshape_fns.soft_to_ndim(sr2, 2), sr2.to_frame())
1428
pd.testing.assert_frame_equal(reshape_fns.soft_to_ndim(df2, 2), df2)
1429
1430
def test_to_1d(self):
1431
np.testing.assert_array_equal(reshape_fns.to_1d(None), np.asarray([None]))
1432
np.testing.assert_array_equal(reshape_fns.to_1d(0), np.asarray([0]))
1433
np.testing.assert_array_equal(reshape_fns.to_1d(a2), a2)
1434
pd.testing.assert_series_equal(reshape_fns.to_1d(sr2), sr2)
1435
pd.testing.assert_series_equal(reshape_fns.to_1d(df2), df2.iloc[:, 0])
1436
np.testing.assert_array_equal(reshape_fns.to_1d(df2, raw=True), df2.iloc[:, 0].values)
1437
1438
def test_to_2d(self):
1439
np.testing.assert_array_equal(reshape_fns.to_2d(None), np.asarray([[None]]))
1440
np.testing.assert_array_equal(reshape_fns.to_2d(0), np.asarray([[0]]))
1441
np.testing.assert_array_equal(reshape_fns.to_2d(a2), a2[:, None])
1442
pd.testing.assert_frame_equal(reshape_fns.to_2d(sr2), sr2.to_frame())
1443
pd.testing.assert_frame_equal(reshape_fns.to_2d(df2), df2)
1444
np.testing.assert_array_equal(reshape_fns.to_2d(df2, raw=True), df2.values)
1445
1446
def test_repeat_axis0(self):
1447
target = np.array([1, 1, 1, 2, 2, 2, 3, 3, 3])
1448
np.testing.assert_array_equal(reshape_fns.repeat(0, 3, axis=0), np.full(3, 0))
1449
np.testing.assert_array_equal(
1450
reshape_fns.repeat(a2, 3, axis=0),
1451
target)
1452
pd.testing.assert_series_equal(
1453
reshape_fns.repeat(sr2, 3, axis=0),
1454
pd.Series(target, index=index_fns.repeat_index(sr2.index, 3), name=sr2.name))
1455
pd.testing.assert_frame_equal(
1456
reshape_fns.repeat(df2, 3, axis=0),
1457
pd.DataFrame(target, index=index_fns.repeat_index(df2.index, 3), columns=df2.columns))
1458
1459
def test_repeat_axis1(self):
1460
target = np.array([[1, 1, 1], [2, 2, 2], [3, 3, 3]])
1461
np.testing.assert_array_equal(reshape_fns.repeat(0, 3, axis=1), np.full((1, 3), 0))
1462
np.testing.assert_array_equal(
1463
reshape_fns.repeat(a2, 3, axis=1),
1464
target)
1465
pd.testing.assert_frame_equal(
1466
reshape_fns.repeat(sr2, 3, axis=1),
1467
pd.DataFrame(target, index=sr2.index, columns=index_fns.repeat_index([sr2.name], 3)))
1468
pd.testing.assert_frame_equal(
1469
reshape_fns.repeat(df2, 3, axis=1),
1470
pd.DataFrame(target, index=df2.index, columns=index_fns.repeat_index(df2.columns, 3)))
1471
1472
def test_tile_axis0(self):
1473
target = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3])
1474
np.testing.assert_array_equal(reshape_fns.tile(0, 3, axis=0), np.full(3, 0))
1475
np.testing.assert_array_equal(
1476
reshape_fns.tile(a2, 3, axis=0),
1477
target)
1478
pd.testing.assert_series_equal(
1479
reshape_fns.tile(sr2, 3, axis=0),
1480
pd.Series(target, index=index_fns.tile_index(sr2.index, 3), name=sr2.name))
1481
pd.testing.assert_frame_equal(
1482
reshape_fns.tile(df2, 3, axis=0),
1483
pd.DataFrame(target, index=index_fns.tile_index(df2.index, 3), columns=df2.columns))
1484
1485
def test_tile_axis1(self):
1486
target = np.array([[1, 1, 1], [2, 2, 2], [3, 3, 3]])
1487
np.testing.assert_array_equal(reshape_fns.tile(0, 3, axis=1), np.full((1, 3), 0))
1488
np.testing.assert_array_equal(
1489
reshape_fns.tile(a2, 3, axis=1),
1490
target)
1491
pd.testing.assert_frame_equal(
1492
reshape_fns.tile(sr2, 3, axis=1),
1493
pd.DataFrame(target, index=sr2.index, columns=index_fns.tile_index([sr2.name], 3)))
1494
pd.testing.assert_frame_equal(
1495
reshape_fns.tile(df2, 3, axis=1),
1496
pd.DataFrame(target, index=df2.index, columns=index_fns.tile_index(df2.columns, 3)))
1497
1498
def test_broadcast_numpy(self):
1499
# 1d
1500
to_broadcast = 0, a1, a2
1501
broadcasted_arrs = list(np.broadcast_arrays(*to_broadcast))
1502
broadcasted = reshape_fns.broadcast(*to_broadcast)
1503
for i in range(len(broadcasted)):
1504
np.testing.assert_array_equal(
1505
broadcasted[i],
1506
broadcasted_arrs[i]
1507
)
1508
# 2d
1509
to_broadcast = 0, a1, a2, a3, a4, a5
1510
broadcasted_arrs = list(np.broadcast_arrays(*to_broadcast))
1511
broadcasted = reshape_fns.broadcast(*to_broadcast)
1512
for i in range(len(broadcasted)):
1513
np.testing.assert_array_equal(
1514
broadcasted[i],
1515
broadcasted_arrs[i]
1516
)
1517
1518
def test_broadcast_stack(self):
1519
# 1d
1520
to_broadcast = 0, a1, a2, sr_none, sr1, sr2
1521
broadcasted_arrs = list(np.broadcast_arrays(*to_broadcast))
1522
broadcasted = reshape_fns.broadcast(
1523
*to_broadcast,
1524
index_from='stack',
1525
columns_from='stack',
1526
drop_duplicates=True,
1527
drop_redundant=True,
1528
ignore_sr_names=True
1529
)
1530
for i in range(len(broadcasted)):
1531
pd.testing.assert_series_equal(
1532
broadcasted[i],
1533
pd.Series(
1534
broadcasted_arrs[i],
1535
index=pd.MultiIndex.from_tuples([
1536
('x1', 'x2'),
1537
('x1', 'y2'),
1538
('x1', 'z2')
1539
], names=['i1', 'i2']),
1540
name=None
1541
)
1542
)
1543
# 2d
1544
to_broadcast_a = 0, a1, a2, a3, a4, a5
1545
to_broadcast_sr = sr_none, sr1, sr2
1546
to_broadcast_df = df_none, df1, df2, df3, df4
1547
broadcasted_arrs = list(np.broadcast_arrays(
1548
*to_broadcast_a,
1549
*[x.to_frame() for x in to_broadcast_sr], # here is the difference
1550
*to_broadcast_df
1551
))
1552
broadcasted = reshape_fns.broadcast(
1553
*to_broadcast_a, *to_broadcast_sr, *to_broadcast_df,
1554
index_from='stack',
1555
columns_from='stack',
1556
drop_duplicates=True,
1557
drop_redundant=True,
1558
ignore_sr_names=True
1559
)
1560
for i in range(len(broadcasted)):
1561
pd.testing.assert_frame_equal(
1562
broadcasted[i],
1563
pd.DataFrame(
1564
broadcasted_arrs[i],
1565
index=pd.MultiIndex.from_tuples([
1566
('x1', 'x2', 'x3', 'x4', 'x5', 'x6'),
1567
('x1', 'y2', 'x3', 'y4', 'x5', 'y6'),
1568
('x1', 'z2', 'x3', 'z4', 'x5', 'z6')
1569
], names=['i1', 'i2', 'i3', 'i4', 'i5', 'i6']),
1570
columns=pd.MultiIndex.from_tuples([
1571
('a3', 'a4', 'a5', 'a6'),
1572
('a3', 'a4', 'b5', 'b6'),
1573
('a3', 'a4', 'c5', 'c6')
1574
], names=['c3', 'c4', 'c5', 'c6'])
1575
)
1576
)
1577
1578
broadcasted = reshape_fns.broadcast(
1579
pd.DataFrame([[1, 2, 3]], columns=pd.Index(['a', 'b', 'c'], name='i1')),
1580
pd.DataFrame([[4, 5, 6]], columns=pd.Index(['a', 'b', 'c'], name='i2')),
1581
index_from='stack',
1582
columns_from='stack',
1583
drop_duplicates=True,
1584
drop_redundant=True,
1585
ignore_sr_names=True
1586
)
1587
pd.testing.assert_frame_equal(
1588
broadcasted[0],
1589
pd.DataFrame([[1, 2, 3]], columns=pd.MultiIndex.from_tuples([
1590
('a', 'a'), ('b', 'b'), ('c', 'c')
1591
], names=['i1', 'i2']))
1592
)
1593
pd.testing.assert_frame_equal(
1594
broadcasted[1],
1595
pd.DataFrame([[4, 5, 6]], columns=pd.MultiIndex.from_tuples([
1596
('a', 'a'), ('b', 'b'), ('c', 'c')
1597
], names=['i1', 'i2']))
1598
)
1599
1600
def test_broadcast_keep(self):
1601
# 1d
1602
to_broadcast = 0, a1, a2, sr_none, sr1, sr2
1603
broadcasted_arrs = list(np.broadcast_arrays(*to_broadcast))
1604
broadcasted = reshape_fns.broadcast(
1605
*to_broadcast,
1606
index_from='keep',
1607
columns_from='keep',
1608
drop_duplicates=True,
1609
drop_redundant=True,
1610
ignore_sr_names=True
1611
)
1612
for i in range(4):
1613
pd.testing.assert_series_equal(
1614
broadcasted[i],
1615
pd.Series(broadcasted_arrs[i], index=pd.RangeIndex(start=0, stop=3, step=1))
1616
)
1617
pd.testing.assert_series_equal(
1618
broadcasted[4],
1619
pd.Series(broadcasted_arrs[4], index=pd.Index(['x1', 'x1', 'x1'], name='i1'), name=sr1.name)
1620
)
1621
pd.testing.assert_series_equal(
1622
broadcasted[5],
1623
pd.Series(broadcasted_arrs[5], index=sr2.index, name=sr2.name)
1624
)
1625
# 2d
1626
to_broadcast_a = 0, a1, a2, a3, a4, a5
1627
to_broadcast_sr = sr_none, sr1, sr2
1628
to_broadcast_df = df_none, df1, df2, df3, df4
1629
broadcasted_arrs = list(np.broadcast_arrays(
1630
*to_broadcast_a,
1631
*[x.to_frame() for x in to_broadcast_sr], # here is the difference
1632
*to_broadcast_df
1633
))
1634
broadcasted = reshape_fns.broadcast(
1635
*to_broadcast_a, *to_broadcast_sr, *to_broadcast_df,
1636
index_from='keep',
1637
columns_from='keep',
1638
drop_duplicates=True,
1639
drop_redundant=True,
1640
ignore_sr_names=True
1641
)
1642
for i in range(7):
1643
pd.testing.assert_frame_equal(
1644
broadcasted[i],
1645
pd.DataFrame(
1646
broadcasted_arrs[i],
1647
index=pd.RangeIndex(start=0, stop=3, step=1),
1648
columns=pd.RangeIndex(start=0, stop=3, step=1)
1649
)
1650
)
1651
pd.testing.assert_frame_equal(
1652
broadcasted[7],
1653
pd.DataFrame(
1654
broadcasted_arrs[7],
1655
index=pd.Index(['x1', 'x1', 'x1'], dtype='object', name='i1'),
1656
columns=pd.Index(['a1', 'a1', 'a1'], dtype='object')
1657
)
1658
)
1659
pd.testing.assert_frame_equal(
1660
broadcasted[8],
1661
pd.DataFrame(
1662
broadcasted_arrs[8],
1663
index=sr2.index,
1664
columns=pd.Index(['a2', 'a2', 'a2'], dtype='object')
1665
)
1666
)
1667
pd.testing.assert_frame_equal(
1668
broadcasted[9],
1669
pd.DataFrame(
1670
broadcasted_arrs[9],
1671
index=pd.RangeIndex(start=0, stop=3, step=1),
1672
columns=pd.RangeIndex(start=0, stop=3, step=1)
1673
)
1674
)
1675
pd.testing.assert_frame_equal(
1676
broadcasted[10],
1677
pd.DataFrame(
1678
broadcasted_arrs[10],
1679
index=pd.Index(['x3', 'x3', 'x3'], dtype='object', name='i3'),
1680
columns=pd.Index(['a3', 'a3', 'a3'], dtype='object', name='c3')
1681
)
1682
)
1683
pd.testing.assert_frame_equal(
1684
broadcasted[11],
1685
pd.DataFrame(
1686
broadcasted_arrs[11],
1687
index=df2.index,
1688
columns=pd.Index(['a4', 'a4', 'a4'], dtype='object', name='c4')
1689
)
1690
)
1691
pd.testing.assert_frame_equal(
1692
broadcasted[12],
1693
pd.DataFrame(
1694
broadcasted_arrs[12],
1695
index=pd.Index(['x5', 'x5', 'x5'], dtype='object', name='i5'),
1696
columns=df3.columns
1697
)
1698
)
1699
pd.testing.assert_frame_equal(
1700
broadcasted[13],
1701
pd.DataFrame(
1702
broadcasted_arrs[13],
1703
index=df4.index,
1704
columns=df4.columns
1705
)
1706
)
1707
1708
def test_broadcast_specify(self):
1709
# 1d
1710
to_broadcast = 0, a1, a2, sr_none, sr1, sr2
1711
broadcasted_arrs = list(np.broadcast_arrays(*to_broadcast))
1712
broadcasted = reshape_fns.broadcast(
1713
*to_broadcast,
1714
index_from=multi_i,
1715
columns_from=['name'], # should translate to Series name
1716
drop_duplicates=True,
1717
drop_redundant=True,
1718
ignore_sr_names=True
1719
)
1720
for i in range(len(broadcasted)):
1721
pd.testing.assert_series_equal(
1722
broadcasted[i],
1723
pd.Series(
1724
broadcasted_arrs[i],
1725
index=multi_i,
1726
name='name'
1727
)
1728
)
1729
broadcasted = reshape_fns.broadcast(
1730
*to_broadcast,
1731
index_from=multi_i,
1732
columns_from=[0], # should translate to None
1733
drop_duplicates=True,
1734
drop_redundant=True,
1735
ignore_sr_names=True
1736
)
1737
for i in range(len(broadcasted)):
1738
pd.testing.assert_series_equal(
1739
broadcasted[i],
1740
pd.Series(
1741
broadcasted_arrs[i],
1742
index=multi_i,
1743
name=None
1744
)
1745
)
1746
# 2d
1747
to_broadcast_a = 0, a1, a2, a3, a4, a5
1748
to_broadcast_sr = sr_none, sr1, sr2
1749
to_broadcast_df = df_none, df1, df2, df3, df4
1750
broadcasted_arrs = list(np.broadcast_arrays(
1751
*to_broadcast_a,
1752
*[x.to_frame() for x in to_broadcast_sr], # here is the difference
1753
*to_broadcast_df
1754
))
1755
broadcasted = reshape_fns.broadcast(
1756
*to_broadcast_a, *to_broadcast_sr, *to_broadcast_df,
1757
index_from=multi_i,
1758
columns_from=multi_c,
1759
drop_duplicates=True,
1760
drop_redundant=True,
1761
ignore_sr_names=True
1762
)
1763
for i in range(len(broadcasted)):
1764
pd.testing.assert_frame_equal(
1765
broadcasted[i],
1766
pd.DataFrame(
1767
broadcasted_arrs[i],
1768
index=multi_i,
1769
columns=multi_c
1770
)
1771
)
1772
1773
def test_broadcast_idx(self):
1774
# 1d
1775
to_broadcast = 0, a1, a2, sr_none, sr1, sr2
1776
broadcasted_arrs = list(np.broadcast_arrays(*to_broadcast))
1777
broadcasted = reshape_fns.broadcast(
1778
*to_broadcast,
1779
index_from=-1,
1780
columns_from=-1, # should translate to Series name
1781
drop_duplicates=True,
1782
drop_redundant=True,
1783
ignore_sr_names=True
1784
)
1785
for i in range(len(broadcasted)):
1786
pd.testing.assert_series_equal(
1787
broadcasted[i],
1788
pd.Series(
1789
broadcasted_arrs[i],
1790
index=sr2.index,
1791
name=sr2.name
1792
)
1793
)
1794
with pytest.raises(Exception):
1795
_ = reshape_fns.broadcast(
1796
*to_broadcast,
1797
index_from=0,
1798
columns_from=0,
1799
drop_duplicates=True,
1800
drop_redundant=True,
1801
ignore_sr_names=True
1802
)
1803
# 2d
1804
to_broadcast_a = 0, a1, a2, a3, a4, a5
1805
to_broadcast_sr = sr_none, sr1, sr2
1806
to_broadcast_df = df_none, df1, df2, df3, df4
1807
broadcasted_arrs = list(np.broadcast_arrays(
1808
*to_broadcast_a,
1809
*[x.to_frame() for x in to_broadcast_sr], # here is the difference
1810
*to_broadcast_df
1811
))
1812
broadcasted = reshape_fns.broadcast(
1813
*to_broadcast_a, *to_broadcast_sr, *to_broadcast_df,
1814
index_from=-1,
1815
columns_from=-1,
1816
drop_duplicates=True,
1817
drop_redundant=True,
1818
ignore_sr_names=True
1819
)
1820
for i in range(len(broadcasted)):
1821
pd.testing.assert_frame_equal(
1822
broadcasted[i],
1823
pd.DataFrame(
1824
broadcasted_arrs[i],
1825
index=df4.index,
1826
columns=df4.columns
1827
)
1828
)
1829
1830
def test_broadcast_strict(self):
1831
# 1d
1832
to_broadcast = sr1, sr2
1833
with pytest.raises(Exception):
1834
_ = reshape_fns.broadcast(
1835
*to_broadcast,
1836
index_from='strict', # changing index not allowed
1837
columns_from='stack',
1838
drop_duplicates=True,
1839
drop_redundant=True,
1840
ignore_sr_names=True
1841
)
1842
# 2d
1843
to_broadcast = df1, df2
1844
with pytest.raises(Exception):
1845
_ = reshape_fns.broadcast(
1846
*to_broadcast,
1847
index_from='stack',
1848
columns_from='strict', # changing columns not allowed
1849
drop_duplicates=True,
1850
drop_redundant=True,
1851
ignore_sr_names=True
1852
)
1853
1854
def test_broadcast_dirty(self):
1855
# 1d
1856
to_broadcast = sr2, 0, a1, a2, sr_none, sr1, sr2
1857
broadcasted_arrs = list(np.broadcast_arrays(*to_broadcast))
1858
broadcasted = reshape_fns.broadcast(
1859
*to_broadcast,
1860
index_from='stack',
1861
columns_from='stack',
1862
drop_duplicates=False,
1863
drop_redundant=False,
1864
ignore_sr_names=False
1865
)
1866
for i in range(len(broadcasted)):
1867
pd.testing.assert_series_equal(
1868
broadcasted[i],
1869
pd.Series(
1870
broadcasted_arrs[i],
1871
index=pd.MultiIndex.from_tuples([
1872
('x2', 'x1', 'x2'),
1873
('y2', 'x1', 'y2'),
1874
('z2', 'x1', 'z2')
1875
], names=['i2', 'i1', 'i2']),
1876
name=('a2', 'a1', 'a2')
1877
)
1878
)
1879
1880
def test_broadcast_to_shape(self):
1881
to_broadcast = 0, a1, a2, sr_none, sr1, sr2
1882
broadcasted_arrs = [
1883
np.broadcast_to(x.to_frame() if isinstance(x, pd.Series) else x, (3, 3))
1884
for x in to_broadcast
1885
]
1886
broadcasted = reshape_fns.broadcast(
1887
*to_broadcast,
1888
to_shape=(3, 3),
1889
index_from='stack',
1890
columns_from='stack',
1891
drop_duplicates=True,
1892
drop_redundant=True,
1893
ignore_sr_names=True
1894
)
1895
for i in range(len(broadcasted)):
1896
pd.testing.assert_frame_equal(
1897
broadcasted[i],
1898
pd.DataFrame(
1899
broadcasted_arrs[i],
1900
index=pd.MultiIndex.from_tuples([
1901
('x1', 'x2'),
1902
('x1', 'y2'),
1903
('x1', 'z2')
1904
], names=['i1', 'i2']),
1905
columns=None
1906
)
1907
)
1908
1909
@pytest.mark.parametrize(
1910
"test_to_pd",
1911
[False, [False, False, False, False, False, False]],
1912
)
1913
def test_broadcast_to_pd(self, test_to_pd):
1914
to_broadcast = 0, a1, a2, sr_none, sr1, sr2
1915
broadcasted_arrs = list(np.broadcast_arrays(*to_broadcast))
1916
broadcasted = reshape_fns.broadcast(
1917
*to_broadcast,
1918
to_pd=test_to_pd, # to NumPy
1919
index_from='stack',
1920
columns_from='stack',
1921
drop_duplicates=True,
1922
drop_redundant=True,
1923
ignore_sr_names=True
1924
)
1925
for i in range(len(broadcasted)):
1926
np.testing.assert_array_equal(
1927
broadcasted[i],
1928
broadcasted_arrs[i]
1929
)
1930
1931
def test_broadcast_require_kwargs(self):
1932
a, b = reshape_fns.broadcast(np.empty((1,)), np.empty((1,))) # readonly
1933
assert not a.flags.writeable
1934
assert not b.flags.writeable
1935
a, b = reshape_fns.broadcast(
1936
np.empty((1,)), np.empty((1,)),
1937
require_kwargs=[{'requirements': 'W'}, {}]) # writeable
1938
assert a.flags.writeable
1939
assert not b.flags.writeable
1940
a, b = reshape_fns.broadcast(
1941
np.empty((1,)), np.empty((1,)),
1942
require_kwargs=[{'requirements': ('W', 'C')}, {}]) # writeable, C order
1943
assert a.flags.writeable # writeable since it was copied to make C order
1944
assert not b.flags.writeable
1945
assert not np.isfortran(a)
1946
assert not np.isfortran(b)
1947
1948
def test_broadcast_meta(self):
1949
_0, _a2, _sr2, _df2 = reshape_fns.broadcast(0, a2, sr2, df2, keep_raw=True)
1950
assert _0 == 0
1951
np.testing.assert_array_equal(_a2, a2)
1952
np.testing.assert_array_equal(_sr2, sr2.values[:, None])
1953
np.testing.assert_array_equal(_df2, df2.values)
1954
_0, _a2, _sr2, _df2 = reshape_fns.broadcast(0, a2, sr2, df2, keep_raw=[False, True, True, True])
1955
test_shape = (3, 3)
1956
test_index = pd.MultiIndex.from_tuples([
1957
('x2', 'x4'),
1958
('y2', 'y4'),
1959
('z2', 'z4')
1960
], names=['i2', 'i4'])
1961
test_columns = pd.Index(['a4', 'a4', 'a4'], name='c4', dtype='object')
1962
pd.testing.assert_frame_equal(
1963
_0,
1964
pd.DataFrame(
1965
np.zeros(test_shape, dtype=int),
1966
index=test_index,
1967
columns=test_columns
1968
)
1969
)
1970
np.testing.assert_array_equal(_a2, a2)
1971
np.testing.assert_array_equal(_sr2, sr2.values[:, None])
1972
np.testing.assert_array_equal(_df2, df2.values)
1973
_, new_shape, new_index, new_columns = reshape_fns.broadcast(0, a2, sr2, df2, return_meta=True)
1974
assert new_shape == test_shape
1975
pd.testing.assert_index_equal(new_index, test_index)
1976
pd.testing.assert_index_equal(new_columns, test_columns)
1977
1978
def test_broadcast_align(self):
1979
index1 = pd.Index(['a', 'b', 'c'])
1980
index2 = pd.MultiIndex.from_tuples([
1981
(0, 'a'),
1982
(0, 'b'),
1983
(0, 'c'),
1984
(1, 'a'),
1985
(1, 'b'),
1986
(1, 'c')
1987
])
1988
index3 = pd.MultiIndex.from_tuples([
1989
(2, 0, 'a'),
1990
(2, 0, 'b'),
1991
(2, 0, 'c'),
1992
(2, 1, 'a'),
1993
(2, 1, 'b'),
1994
(2, 1, 'c'),
1995
(3, 0, 'a'),
1996
(3, 0, 'b'),
1997
(3, 0, 'c'),
1998
(3, 1, 'a'),
1999
(3, 1, 'b'),
2000
(3, 1, 'c')
2001
])
2002
sr1 = pd.Series(np.arange(len(index1)), index=index1)
2003
df2 = pd.DataFrame(
2004
np.reshape(np.arange(len(index2) * len(index2)), (len(index2), len(index2))),
2005
index=index2, columns=index2
2006
)
2007
df3 = pd.DataFrame(
2008
np.reshape(np.arange(len(index3) * len(index3)), (len(index3), len(index3))),
2009
index=index3, columns=index3
2010
)
2011
_df1, _df2, _df3 = reshape_fns.broadcast(sr1, df2, df3, align_index=True, align_columns=True)
2012
pd.testing.assert_frame_equal(
2013
_df1,
2014
pd.DataFrame(np.array([
2015
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
2016
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
2017
[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
2018
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
2019
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
2020
[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
2021
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
2022
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
2023
[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
2024
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
2025
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
2026
[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
2027
]), index=index3, columns=index3)
2028
)
2029
pd.testing.assert_frame_equal(
2030
_df2,
2031
pd.DataFrame(np.array([
2032
[0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5],
2033
[6, 7, 8, 9, 10, 11, 6, 7, 8, 9, 10, 11],
2034
[12, 13, 14, 15, 16, 17, 12, 13, 14, 15, 16, 17],
2035
[18, 19, 20, 21, 22, 23, 18, 19, 20, 21, 22, 23],
2036
[24, 25, 26, 27, 28, 29, 24, 25, 26, 27, 28, 29],
2037
[30, 31, 32, 33, 34, 35, 30, 31, 32, 33, 34, 35],
2038
[0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5],
2039
[6, 7, 8, 9, 10, 11, 6, 7, 8, 9, 10, 11],
2040
[12, 13, 14, 15, 16, 17, 12, 13, 14, 15, 16, 17],
2041
[18, 19, 20, 21, 22, 23, 18, 19, 20, 21, 22, 23],
2042
[24, 25, 26, 27, 28, 29, 24, 25, 26, 27, 28, 29],
2043
[30, 31, 32, 33, 34, 35, 30, 31, 32, 33, 34, 35]
2044
]), index=index3, columns=index3)
2045
)
2046
pd.testing.assert_frame_equal(_df3, df3)
2047
2048
def test_broadcast_to(self):
2049
np.testing.assert_array_equal(reshape_fns.broadcast_to(0, a5), np.broadcast_to(0, a5.shape))
2050
pd.testing.assert_series_equal(
2051
reshape_fns.broadcast_to(0, sr2),
2052
pd.Series(np.broadcast_to(0, sr2.shape), index=sr2.index, name=sr2.name)
2053
)
2054
pd.testing.assert_frame_equal(
2055
reshape_fns.broadcast_to(0, df5),
2056
pd.DataFrame(np.broadcast_to(0, df5.shape), index=df5.index, columns=df5.columns)
2057
)
2058
pd.testing.assert_frame_equal(
2059
reshape_fns.broadcast_to(sr2, df5),
2060
pd.DataFrame(np.broadcast_to(sr2.to_frame(), df5.shape), index=df5.index, columns=df5.columns)
2061
)
2062
pd.testing.assert_frame_equal(
2063
reshape_fns.broadcast_to(sr2, df5, index_from=0, columns_from=0),
2064
pd.DataFrame(
2065
np.broadcast_to(sr2.to_frame(), df5.shape),
2066
index=sr2.index,
2067
columns=pd.Index(['a2', 'a2', 'a2'], dtype='object'))
2068
)
2069
2070
@pytest.mark.parametrize(
2071
"test_input",
2072
[0, a2, a5, sr2, df5, np.zeros((2, 2, 2))],
2073
)
2074
def test_broadcast_to_array_of(self, test_input):
2075
# broadcasting first element to be an array out of the second argument
2076
np.testing.assert_array_equal(
2077
reshape_fns.broadcast_to_array_of(0.1, test_input),
2078
np.full((1, *np.asarray(test_input).shape), 0.1)
2079
)
2080
np.testing.assert_array_equal(
2081
reshape_fns.broadcast_to_array_of([0.1], test_input),
2082
np.full((1, *np.asarray(test_input).shape), 0.1)
2083
)
2084
np.testing.assert_array_equal(
2085
reshape_fns.broadcast_to_array_of([0.1, 0.2], test_input),
2086
np.concatenate((
2087
np.full((1, *np.asarray(test_input).shape), 0.1),
2088
np.full((1, *np.asarray(test_input).shape), 0.2)
2089
))
2090
)
2091
np.testing.assert_array_equal(
2092
reshape_fns.broadcast_to_array_of(np.expand_dims(np.asarray(test_input), 0), test_input), # do nothing
2093
np.expand_dims(np.asarray(test_input), 0)
2094
)
2095
2096
def test_broadcast_to_axis_of(self):
2097
np.testing.assert_array_equal(
2098
reshape_fns.broadcast_to_axis_of(10, np.empty((2,)), 0),
2099
np.full(2, 10)
2100
)
2101
assert reshape_fns.broadcast_to_axis_of(10, np.empty((2,)), 1) == 10
2102
np.testing.assert_array_equal(
2103
reshape_fns.broadcast_to_axis_of(10, np.empty((2, 3)), 0),
2104
np.full(2, 10)
2105
)
2106
np.testing.assert_array_equal(
2107
reshape_fns.broadcast_to_axis_of(10, np.empty((2, 3)), 1),
2108
np.full(3, 10)
2109
)
2110
assert reshape_fns.broadcast_to_axis_of(10, np.empty((2, 3)), 2) == 10
2111
2112
def test_unstack_to_array(self):
2113
i = pd.MultiIndex.from_arrays([[1, 1, 2, 2], [3, 4, 3, 4], ['a', 'b', 'c', 'd']])
2114
sr = pd.Series([1, 2, 3, 4], index=i)
2115
np.testing.assert_array_equal(
2116
reshape_fns.unstack_to_array(sr),
2117
np.asarray([[
2118
[1., np.nan, np.nan, np.nan],
2119
[np.nan, 2., np.nan, np.nan]
2120
], [
2121
[np.nan, np.nan, 3., np.nan],
2122
[np.nan, np.nan, np.nan, 4.]
2123
]])
2124
)
2125
np.testing.assert_array_equal(
2126
reshape_fns.unstack_to_array(sr, levels=(0,)),
2127
np.asarray([2., 4.])
2128
)
2129
np.testing.assert_array_equal(
2130
reshape_fns.unstack_to_array(sr, levels=(2, 0)),
2131
np.asarray([
2132
[1., np.nan],
2133
[2., np.nan],
2134
[np.nan, 3.],
2135
[np.nan, 4.],
2136
])
2137
)
2138
2139
def test_make_symmetric(self):
2140
pd.testing.assert_frame_equal(
2141
reshape_fns.make_symmetric(sr2),
2142
pd.DataFrame(
2143
np.array([
2144
[np.nan, 1.0, 2.0, 3.0],
2145
[1.0, np.nan, np.nan, np.nan],
2146
[2.0, np.nan, np.nan, np.nan],
2147
[3.0, np.nan, np.nan, np.nan]
2148
]),
2149
index=pd.Index(['a2', 'x2', 'y2', 'z2'], dtype='object', name=('i2', None)),
2150
columns=pd.Index(['a2', 'x2', 'y2', 'z2'], dtype='object', name=('i2', None))
2151
)
2152
)
2153
pd.testing.assert_frame_equal(
2154
reshape_fns.make_symmetric(df2),
2155
pd.DataFrame(
2156
np.array([
2157
[np.nan, 1.0, 2.0, 3.0],
2158
[1.0, np.nan, np.nan, np.nan],
2159
[2.0, np.nan, np.nan, np.nan],
2160
[3.0, np.nan, np.nan, np.nan]
2161
]),
2162
index=pd.Index(['a4', 'x4', 'y4', 'z4'], dtype='object', name=('i4', 'c4')),
2163
columns=pd.Index(['a4', 'x4', 'y4', 'z4'], dtype='object', name=('i4', 'c4'))
2164
)
2165
)
2166
pd.testing.assert_frame_equal(
2167
reshape_fns.make_symmetric(df5),
2168
pd.DataFrame(
2169
np.array([
2170
[np.nan, np.nan, np.nan, 1.0, 4.0, 7.0],
2171
[np.nan, np.nan, np.nan, 2.0, 5.0, 8.0],
2172
[np.nan, np.nan, np.nan, 3.0, 6.0, 9.0],
2173
[1.0, 2.0, 3.0, np.nan, np.nan, np.nan],
2174
[4.0, 5.0, 6.0, np.nan, np.nan, np.nan],
2175
[7.0, 8.0, 9.0, np.nan, np.nan, np.nan]
2176
]),
2177
index=pd.MultiIndex.from_tuples([
2178
('a7', 'a8'),
2179
('b7', 'b8'),
2180
('c7', 'c8'),
2181
('x7', 'x8'),
2182
('y7', 'y8'),
2183
('z7', 'z8')
2184
], names=[('i7', 'c7'), ('i8', 'c8')]),
2185
columns=pd.MultiIndex.from_tuples([
2186
('a7', 'a8'),
2187
('b7', 'b8'),
2188
('c7', 'c8'),
2189
('x7', 'x8'),
2190
('y7', 'y8'),
2191
('z7', 'z8')
2192
], names=[('i7', 'c7'), ('i8', 'c8')])
2193
)
2194
)
2195
pd.testing.assert_frame_equal(
2196
reshape_fns.make_symmetric(pd.Series([1, 2, 3], name='yo'), sort=False),
2197
pd.DataFrame(
2198
np.array([
2199
[np.nan, np.nan, np.nan, 1.0],
2200
[np.nan, np.nan, np.nan, 2.0],
2201
[np.nan, np.nan, np.nan, 3.0],
2202
[1.0, 2.0, 3.0, np.nan]
2203
]),
2204
index=pd.Index([0, 1, 2, 'yo'], dtype='object'),
2205
columns=pd.Index([0, 1, 2, 'yo'], dtype='object')
2206
)
2207
)
2208
2209
def test_unstack_to_df(self):
2210
pd.testing.assert_frame_equal(
2211
reshape_fns.unstack_to_df(df5.iloc[0]),
2212
pd.DataFrame(
2213
np.array([
2214
[1.0, np.nan, np.nan],
2215
[np.nan, 2.0, np.nan],
2216
[np.nan, np.nan, 3.0]
2217
]),
2218
index=pd.Index(['a7', 'b7', 'c7'], dtype='object', name='c7'),
2219
columns=pd.Index(['a8', 'b8', 'c8'], dtype='object', name='c8')
2220
)
2221
)
2222
i = pd.MultiIndex.from_arrays([[1, 1, 2, 2], [3, 4, 3, 4], ['a', 'b', 'c', 'd']])
2223
sr = pd.Series([1, 2, 3, 4], index=i)
2224
pd.testing.assert_frame_equal(
2225
reshape_fns.unstack_to_df(sr, index_levels=0, column_levels=1),
2226
pd.DataFrame(
2227
np.array([
2228
[1.0, 2.0],
2229
[3.0, 4.0]
2230
]),
2231
index=pd.Index([1, 2], dtype='int64'),
2232
columns=pd.Index([3, 4], dtype='int64')
2233
)
2234
)
2235
pd.testing.assert_frame_equal(
2236
reshape_fns.unstack_to_df(sr, index_levels=(0, 1), column_levels=2),
2237
pd.DataFrame(
2238
np.array([
2239
[1.0, np.nan, np.nan, np.nan],
2240
[np.nan, 2.0, np.nan, np.nan],
2241
[np.nan, np.nan, 3.0, np.nan],
2242
[np.nan, np.nan, np.nan, 4.0]
2243
]),
2244
index=pd.MultiIndex.from_tuples([
2245
(1, 3),
2246
(1, 4),
2247
(2, 3),
2248
(2, 4)
2249
]),
2250
columns=pd.Index(['a', 'b', 'c', 'd'], dtype='object')
2251
)
2252
)
2253
pd.testing.assert_frame_equal(
2254
reshape_fns.unstack_to_df(sr, index_levels=0, column_levels=1, symmetric=True),
2255
pd.DataFrame(
2256
np.array([
2257
[np.nan, np.nan, 1.0, 2.0],
2258
[np.nan, np.nan, 3.0, 4.0],
2259
[1.0, 3.0, np.nan, np.nan],
2260
[2.0, 4.0, np.nan, np.nan]
2261
]),
2262
index=pd.Index([1, 2, 3, 4], dtype='int64'),
2263
columns=pd.Index([1, 2, 3, 4], dtype='int64')
2264
)
2265
)
2266
2267
@pytest.mark.parametrize(
2268
"test_inputs",
2269
[
2270
(0, a1, a2, sr_none, sr1, sr2),
2271
(0, a1, a2, a3, a4, a5, sr_none, sr1, sr2, df_none, df1, df2, df3, df4)
2272
],
2273
)
2274
def test_flex(self, test_inputs):
2275
raw_args = reshape_fns.broadcast(*test_inputs, keep_raw=True)
2276
bc_args = reshape_fns.broadcast(*test_inputs, keep_raw=False)
2277
for r in range(len(test_inputs)):
2278
raw_arg = raw_args[r]
2279
bc_arg = np.array(bc_args[r])
2280
bc_arg_2d = reshape_fns.to_2d(bc_arg)
2281
def_i, def_col = reshape_fns.flex_choose_i_and_col_nb(raw_arg, flex_2d=bc_arg.ndim == 2)
2282
for col in range(bc_arg_2d.shape[1]):
2283
for i in range(bc_arg_2d.shape[0]):
2284
assert bc_arg_2d[i, col] == reshape_fns.flex_select_nb(
2285
raw_arg, i, col, def_i, def_col, bc_arg.ndim == 2)
2286
2287
2288
# ############# indexing.py ############# #
2289
2290
2291
called_dict = {}
2292
2293
PandasIndexer = indexing.PandasIndexer
2294
ParamIndexer = indexing.build_param_indexer(['param1', 'param2', 'tuple'])
2295
2296
2297
class H(PandasIndexer, ParamIndexer):
2298
def __init__(self, a, param1_mapper, param2_mapper, tuple_mapper, level_names):
2299
self.a = a
2300
2301
self._param1_mapper = param1_mapper
2302
self._param2_mapper = param2_mapper
2303
self._tuple_mapper = tuple_mapper
2304
self._level_names = level_names
2305
2306
PandasIndexer.__init__(self, calling='PandasIndexer')
2307
ParamIndexer.__init__(
2308
self,
2309
[param1_mapper, param2_mapper, tuple_mapper],
2310
level_names=[level_names[0], level_names[1], level_names],
2311
calling='ParamIndexer'
2312
)
2313
2314
def indexing_func(self, pd_indexing_func, calling=None):
2315
# As soon as you call iloc etc., performs it on each dataframe and mapper and returns a new class instance
2316
called_dict[calling] = True
2317
param1_mapper = indexing.indexing_on_mapper(self._param1_mapper, self.a, pd_indexing_func)
2318
param2_mapper = indexing.indexing_on_mapper(self._param2_mapper, self.a, pd_indexing_func)
2319
tuple_mapper = indexing.indexing_on_mapper(self._tuple_mapper, self.a, pd_indexing_func)
2320
return H(pd_indexing_func(self.a), param1_mapper, param2_mapper, tuple_mapper, self._level_names)
2321
2322
@classmethod
2323
def run(cls, a, params1, params2, level_names=('p1', 'p2')):
2324
a = reshape_fns.to_2d(a)
2325
# Build column hierarchy
2326
params1_idx = pd.Index(params1, name=level_names[0])
2327
params2_idx = pd.Index(params2, name=level_names[1])
2328
params_idx = index_fns.stack_indexes([params1_idx, params2_idx])
2329
new_columns = index_fns.combine_indexes([params_idx, a.columns])
2330
2331
# Build mappers
2332
param1_mapper = np.repeat(params1, len(a.columns))
2333
param1_mapper = pd.Series(param1_mapper, index=new_columns)
2334
2335
param2_mapper = np.repeat(params2, len(a.columns))
2336
param2_mapper = pd.Series(param2_mapper, index=new_columns)
2337
2338
tuple_mapper = list(zip(*list(map(lambda x: x.values, [param1_mapper, param2_mapper]))))
2339
tuple_mapper = pd.Series(tuple_mapper, index=new_columns)
2340
2341
# Tile a to match the length of new_columns
2342
a = array_wrapper.ArrayWrapper(a.index, new_columns, 2).wrap(reshape_fns.tile(a.values, 4, axis=1))
2343
return cls(a, param1_mapper, param2_mapper, tuple_mapper, level_names)
2344
2345
2346
# Similate an indicator with two params
2347
h = H.run(df4, [0.1, 0.1, 0.2, 0.2], [0.3, 0.4, 0.5, 0.6])
2348
2349
2350
class TestIndexing:
2351
def test_kwargs(self):
2352
_ = h[(0.1, 0.3, 'a6')]
2353
assert called_dict['PandasIndexer']
2354
_ = h.param1_loc[0.1]
2355
assert called_dict['ParamIndexer']
2356
2357
def test_pandas_indexing(self):
2358
# __getitem__
2359
pd.testing.assert_series_equal(
2360
h[(0.1, 0.3, 'a6')].a,
2361
pd.Series(
2362
np.array([1, 4, 7]),
2363
index=pd.Index(['x6', 'y6', 'z6'], dtype='object', name='i6'),
2364
name=(0.1, 0.3, 'a6')
2365
)
2366
)
2367
# loc
2368
pd.testing.assert_frame_equal(
2369
h.loc[:, (0.1, 0.3, 'a6'):(0.1, 0.3, 'c6')].a,
2370
pd.DataFrame(
2371
np.array([
2372
[1, 2, 3],
2373
[4, 5, 6],
2374
[7, 8, 9]
2375
]),
2376
index=pd.Index(['x6', 'y6', 'z6'], dtype='object', name='i6'),
2377
columns=pd.MultiIndex.from_tuples([
2378
(0.1, 0.3, 'a6'),
2379
(0.1, 0.3, 'b6'),
2380
(0.1, 0.3, 'c6')
2381
], names=['p1', 'p2', 'c6'])
2382
)
2383
)
2384
# iloc
2385
pd.testing.assert_frame_equal(
2386
h.iloc[-2:, -2:].a,
2387
pd.DataFrame(
2388
np.array([
2389
[5, 6],
2390
[8, 9]
2391
]),
2392
index=pd.Index(['y6', 'z6'], dtype='object', name='i6'),
2393
columns=pd.MultiIndex.from_tuples([
2394
(0.2, 0.6, 'b6'),
2395
(0.2, 0.6, 'c6')
2396
], names=['p1', 'p2', 'c6'])
2397
)
2398
)
2399
# xs
2400
pd.testing.assert_frame_equal(
2401
h.xs((0.1, 0.3), level=('p1', 'p2'), axis=1).a,
2402
pd.DataFrame(
2403
np.array([
2404
[1, 2, 3],
2405
[4, 5, 6],
2406
[7, 8, 9]
2407
]),
2408
index=pd.Index(['x6', 'y6', 'z6'], dtype='object', name='i6'),
2409
columns=pd.Index(['a6', 'b6', 'c6'], dtype='object', name='c6')
2410
)
2411
)
2412
2413
def test_param_indexing(self):
2414
# param1
2415
pd.testing.assert_frame_equal(
2416
h.param1_loc[0.1].a,
2417
pd.DataFrame(
2418
np.array([
2419
[1, 2, 3, 1, 2, 3],
2420
[4, 5, 6, 4, 5, 6],
2421
[7, 8, 9, 7, 8, 9]
2422
]),
2423
index=pd.Index(['x6', 'y6', 'z6'], dtype='object', name='i6'),
2424
columns=pd.MultiIndex.from_tuples([
2425
(0.3, 'a6'),
2426
(0.3, 'b6'),
2427
(0.3, 'c6'),
2428
(0.4, 'a6'),
2429
(0.4, 'b6'),
2430
(0.4, 'c6')
2431
], names=['p2', 'c6'])
2432
)
2433
)
2434
# param2
2435
pd.testing.assert_frame_equal(
2436
h.param2_loc[0.3].a,
2437
pd.DataFrame(
2438
np.array([
2439
[1, 2, 3],
2440
[4, 5, 6],
2441
[7, 8, 9]
2442
]),
2443
index=pd.Index(['x6', 'y6', 'z6'], dtype='object', name='i6'),
2444
columns=pd.MultiIndex.from_tuples([
2445
(0.1, 'a6'),
2446
(0.1, 'b6'),
2447
(0.1, 'c6')
2448
], names=['p1', 'c6'])
2449
)
2450
)
2451
# tuple
2452
pd.testing.assert_frame_equal(
2453
h.tuple_loc[(0.1, 0.3)].a,
2454
pd.DataFrame(
2455
np.array([
2456
[1, 2, 3],
2457
[4, 5, 6],
2458
[7, 8, 9]
2459
]),
2460
index=pd.Index(['x6', 'y6', 'z6'], dtype='object', name='i6'),
2461
columns=pd.Index(['a6', 'b6', 'c6'], dtype='object', name='c6')
2462
)
2463
)
2464
pd.testing.assert_frame_equal(
2465
h.tuple_loc[(0.1, 0.3):(0.1, 0.3)].a,
2466
pd.DataFrame(
2467
np.array([
2468
[1, 2, 3],
2469
[4, 5, 6],
2470
[7, 8, 9]
2471
]),
2472
index=pd.Index(['x6', 'y6', 'z6'], dtype='object', name='i6'),
2473
columns=pd.MultiIndex.from_tuples([
2474
(0.1, 0.3, 'a6'),
2475
(0.1, 0.3, 'b6'),
2476
(0.1, 0.3, 'c6')
2477
], names=['p1', 'p2', 'c6'])
2478
)
2479
)
2480
pd.testing.assert_frame_equal(
2481
h.tuple_loc[[(0.1, 0.3), (0.1, 0.3)]].a,
2482
pd.DataFrame(
2483
np.array([
2484
[1, 2, 3, 1, 2, 3],
2485
[4, 5, 6, 4, 5, 6],
2486
[7, 8, 9, 7, 8, 9]
2487
]),
2488
index=pd.Index(['x6', 'y6', 'z6'], dtype='object', name='i6'),
2489
columns=pd.MultiIndex.from_tuples([
2490
(0.1, 0.3, 'a6'),
2491
(0.1, 0.3, 'b6'),
2492
(0.1, 0.3, 'c6'),
2493
(0.1, 0.3, 'a6'),
2494
(0.1, 0.3, 'b6'),
2495
(0.1, 0.3, 'c6')
2496
], names=['p1', 'p2', 'c6'])
2497
)
2498
)
2499
2500
2501
# ############# combine_fns.py ############# #
2502
2503
class TestCombineFns:
2504
def test_apply_and_concat_one(self):
2505
def apply_func(i, x, a):
2506
return x + a[i]
2507
2508
@njit
2509
def apply_func_nb(i, x, a):
2510
return x + a[i]
2511
2512
# 1d
2513
target = np.array([
2514
[11, 21, 31],
2515
[12, 22, 32],
2516
[13, 23, 33]
2517
])
2518
np.testing.assert_array_equal(
2519
combine_fns.apply_and_concat_one(3, apply_func, sr2.values, [10, 20, 30]),
2520
target
2521
)
2522
np.testing.assert_array_equal(
2523
combine_fns.apply_and_concat_one_nb(3, apply_func_nb, sr2.values, (10, 20, 30)),
2524
target
2525
)
2526
# 2d
2527
target2 = np.array([
2528
[11, 12, 13, 21, 22, 23, 31, 32, 33],
2529
[14, 15, 16, 24, 25, 26, 34, 35, 36],
2530
[17, 18, 19, 27, 28, 29, 37, 38, 39]
2531
])
2532
np.testing.assert_array_equal(
2533
combine_fns.apply_and_concat_one(3, apply_func, df4.values, [10, 20, 30]),
2534
target2
2535
)
2536
np.testing.assert_array_equal(
2537
combine_fns.apply_and_concat_one_nb(3, apply_func_nb, df4.values, (10, 20, 30)),
2538
target2
2539
)
2540
2541
def test_apply_and_concat_multiple(self):
2542
def apply_func(i, x, a):
2543
return (x, x + a[i])
2544
2545
@njit
2546
def apply_func_nb(i, x, a):
2547
return (x, x + a[i])
2548
2549
# 1d
2550
target_a = np.array([
2551
[1, 1, 1],
2552
[2, 2, 2],
2553
[3, 3, 3]
2554
])
2555
target_b = np.array([
2556
[11, 21, 31],
2557
[12, 22, 32],
2558
[13, 23, 33]
2559
])
2560
a, b = combine_fns.apply_and_concat_multiple(3, apply_func, sr2.values, [10, 20, 30])
2561
np.testing.assert_array_equal(a, target_a)
2562
np.testing.assert_array_equal(b, target_b)
2563
a, b = combine_fns.apply_and_concat_multiple_nb(3, apply_func_nb, sr2.values, (10, 20, 30))
2564
np.testing.assert_array_equal(a, target_a)
2565
np.testing.assert_array_equal(b, target_b)
2566
# 2d
2567
target_a = np.array([
2568
[1, 2, 3, 1, 2, 3, 1, 2, 3],
2569
[4, 5, 6, 4, 5, 6, 4, 5, 6],
2570
[7, 8, 9, 7, 8, 9, 7, 8, 9]
2571
])
2572
target_b = np.array([
2573
[11, 12, 13, 21, 22, 23, 31, 32, 33],
2574
[14, 15, 16, 24, 25, 26, 34, 35, 36],
2575
[17, 18, 19, 27, 28, 29, 37, 38, 39]
2576
])
2577
a, b = combine_fns.apply_and_concat_multiple(3, apply_func, df4.values, [10, 20, 30])
2578
np.testing.assert_array_equal(a, target_a)
2579
np.testing.assert_array_equal(b, target_b)
2580
a, b = combine_fns.apply_and_concat_multiple_nb(3, apply_func_nb, df4.values, (10, 20, 30))
2581
np.testing.assert_array_equal(a, target_a)
2582
np.testing.assert_array_equal(b, target_b)
2583
2584
def test_combine_and_concat(self):
2585
def combine_func(x, y, a):
2586
return x + y + a
2587
2588
@njit
2589
def combine_func_nb(x, y, a):
2590
return x + y + a
2591
2592
# 1d
2593
target = np.array([
2594
[103, 104],
2595
[106, 108],
2596
[109, 112]
2597
])
2598
np.testing.assert_array_equal(
2599
combine_fns.combine_and_concat(
2600
sr2.values, (sr2.values * 2, sr2.values * 3), combine_func, 100),
2601
target
2602
)
2603
np.testing.assert_array_equal(
2604
combine_fns.combine_and_concat_nb(
2605
sr2.values, (sr2.values * 2, sr2.values * 3), combine_func_nb, 100),
2606
target
2607
)
2608
# 2d
2609
target2 = np.array([
2610
[103, 106, 109, 104, 108, 112],
2611
[112, 115, 118, 116, 120, 124],
2612
[121, 124, 127, 128, 132, 136]
2613
])
2614
np.testing.assert_array_equal(
2615
combine_fns.combine_and_concat(
2616
df4.values, (df4.values * 2, df4.values * 3), combine_func, 100),
2617
target2
2618
)
2619
np.testing.assert_array_equal(
2620
combine_fns.combine_and_concat_nb(
2621
df4.values, (df4.values * 2, df4.values * 3), combine_func_nb, 100),
2622
target2
2623
)
2624
2625
def test_combine_multiple(self):
2626
def combine_func(x, y, a):
2627
return x + y + a
2628
2629
@njit
2630
def combine_func_nb(x, y, a):
2631
return x + y + a
2632
2633
# 1d
2634
target = np.array([206, 212, 218])
2635
np.testing.assert_array_equal(
2636
combine_fns.combine_multiple(
2637
(sr2.values, sr2.values * 2, sr2.values * 3), combine_func, 100),
2638
target
2639
)
2640
np.testing.assert_array_equal(
2641
combine_fns.combine_multiple_nb(
2642
(sr2.values, sr2.values * 2, sr2.values * 3), combine_func_nb, 100),
2643
target
2644
)
2645
# 2d
2646
target2 = np.array([
2647
[206, 212, 218],
2648
[224, 230, 236],
2649
[242, 248, 254]
2650
])
2651
np.testing.assert_array_equal(
2652
combine_fns.combine_multiple(
2653
(df4.values, df4.values * 2, df4.values * 3), combine_func, 100),
2654
target2
2655
)
2656
np.testing.assert_array_equal(
2657
combine_fns.combine_multiple_nb(
2658
(df4.values, df4.values * 2, df4.values * 3), combine_func_nb, 100),
2659
target2
2660
)
2661
2662
2663
# ############# accessors.py ############# #
2664
2665
class TestAccessors:
2666
def test_indexing(self):
2667
pd.testing.assert_series_equal(df4.vbt['a6'].obj, df4['a6'].vbt.obj)
2668
2669
def test_freq(self):
2670
ts = pd.Series([1, 2, 3], index=pd.DatetimeIndex([
2671
datetime(2018, 1, 1),
2672
datetime(2018, 1, 2),
2673
datetime(2018, 1, 3)
2674
]))
2675
assert ts.vbt.wrapper.freq == day_dt
2676
assert ts.vbt(freq='2D').wrapper.freq == day_dt * 2
2677
assert pd.Series([1, 2, 3]).vbt.wrapper.freq is None
2678
assert pd.Series([1, 2, 3]).vbt(freq='3D').wrapper.freq == day_dt * 3
2679
assert pd.Series([1, 2, 3]).vbt(freq=np.timedelta64(4, 'D')).wrapper.freq == day_dt * 4
2680
2681
def test_props(self):
2682
assert sr1.vbt.is_series()
2683
assert not sr1.vbt.is_frame()
2684
assert not df1.vbt.is_series()
2685
assert df2.vbt.is_frame()
2686
2687
def test_wrapper(self):
2688
pd.testing.assert_index_equal(sr2.vbt.wrapper.index, sr2.index)
2689
pd.testing.assert_index_equal(sr2.vbt.wrapper.columns, sr2.to_frame().columns)
2690
assert sr2.vbt.wrapper.ndim == sr2.ndim
2691
assert sr2.vbt.wrapper.name == sr2.name
2692
assert pd.Series([1, 2, 3]).vbt.wrapper.name is None
2693
assert sr2.vbt.wrapper.shape == sr2.shape
2694
assert sr2.vbt.wrapper.shape_2d == (sr2.shape[0], 1)
2695
pd.testing.assert_index_equal(df4.vbt.wrapper.index, df4.index)
2696
pd.testing.assert_index_equal(df4.vbt.wrapper.columns, df4.columns)
2697
assert df4.vbt.wrapper.ndim == df4.ndim
2698
assert df4.vbt.wrapper.name is None
2699
assert df4.vbt.wrapper.shape == df4.shape
2700
assert df4.vbt.wrapper.shape_2d == df4.shape
2701
pd.testing.assert_series_equal(sr2.vbt.wrapper.wrap(a2), sr2)
2702
pd.testing.assert_series_equal(sr2.vbt.wrapper.wrap(df2), sr2)
2703
pd.testing.assert_series_equal(
2704
sr2.vbt.wrapper.wrap(df2.values, index=df2.index, columns=df2.columns),
2705
pd.Series(df2.values[:, 0], index=df2.index, name=df2.columns[0])
2706
)
2707
pd.testing.assert_frame_equal(
2708
sr2.vbt.wrapper.wrap(df4.values, columns=df4.columns),
2709
pd.DataFrame(df4.values, index=sr2.index, columns=df4.columns)
2710
)
2711
pd.testing.assert_frame_equal(df2.vbt.wrapper.wrap(a2), df2)
2712
pd.testing.assert_frame_equal(df2.vbt.wrapper.wrap(sr2), df2)
2713
pd.testing.assert_frame_equal(
2714
df2.vbt.wrapper.wrap(df4.values, columns=df4.columns),
2715
pd.DataFrame(df4.values, index=df2.index, columns=df4.columns)
2716
)
2717
2718
def test_empty(self):
2719
pd.testing.assert_series_equal(
2720
pd.Series.vbt.empty(5, index=np.arange(10, 15), name='a', fill_value=5),
2721
pd.Series(np.full(5, 5), index=np.arange(10, 15), name='a')
2722
)
2723
pd.testing.assert_frame_equal(
2724
pd.DataFrame.vbt.empty((5, 3), index=np.arange(10, 15), columns=['a', 'b', 'c'], fill_value=5),
2725
pd.DataFrame(np.full((5, 3), 5), index=np.arange(10, 15), columns=['a', 'b', 'c'])
2726
)
2727
pd.testing.assert_series_equal(
2728
pd.Series.vbt.empty_like(sr2, fill_value=5),
2729
pd.Series(np.full(sr2.shape, 5), index=sr2.index, name=sr2.name)
2730
)
2731
pd.testing.assert_frame_equal(
2732
pd.DataFrame.vbt.empty_like(df4, fill_value=5),
2733
pd.DataFrame(np.full(df4.shape, 5), index=df4.index, columns=df4.columns)
2734
)
2735
2736
def test_apply_func_on_index(self):
2737
pd.testing.assert_frame_equal(
2738
df1.vbt.apply_on_index(lambda idx: idx + '_yo', axis=0),
2739
pd.DataFrame(
2740
np.asarray([1]),
2741
index=pd.Index(['x3_yo'], dtype='object', name='i3'),
2742
columns=pd.Index(['a3'], dtype='object', name='c3')
2743
)
2744
)
2745
pd.testing.assert_frame_equal(
2746
df1.vbt.apply_on_index(lambda idx: idx + '_yo', axis=1),
2747
pd.DataFrame(
2748
np.asarray([1]),
2749
index=pd.Index(['x3'], dtype='object', name='i3'),
2750
columns=pd.Index(['a3_yo'], dtype='object', name='c3')
2751
)
2752
)
2753
df1_copy = df1.copy()
2754
df1_copy.vbt.apply_on_index(lambda idx: idx + '_yo', axis=0, inplace=True)
2755
pd.testing.assert_frame_equal(
2756
df1_copy,
2757
pd.DataFrame(
2758
np.asarray([1]),
2759
index=pd.Index(['x3_yo'], dtype='object', name='i3'),
2760
columns=pd.Index(['a3'], dtype='object', name='c3')
2761
)
2762
)
2763
df1_copy2 = df1.copy()
2764
df1_copy2.vbt.apply_on_index(lambda idx: idx + '_yo', axis=1, inplace=True)
2765
pd.testing.assert_frame_equal(
2766
df1_copy2,
2767
pd.DataFrame(
2768
np.asarray([1]),
2769
index=pd.Index(['x3'], dtype='object', name='i3'),
2770
columns=pd.Index(['a3_yo'], dtype='object', name='c3')
2771
)
2772
)
2773
2774
def test_stack_index(self):
2775
pd.testing.assert_frame_equal(
2776
df5.vbt.stack_index([1, 2, 3], on_top=True),
2777
pd.DataFrame(
2778
df5.values,
2779
index=df5.index,
2780
columns=pd.MultiIndex.from_tuples([
2781
(1, 'a7', 'a8'),
2782
(2, 'b7', 'b8'),
2783
(3, 'c7', 'c8')
2784
], names=[None, 'c7', 'c8'])
2785
)
2786
)
2787
pd.testing.assert_frame_equal(
2788
df5.vbt.stack_index([1, 2, 3], on_top=False),
2789
pd.DataFrame(
2790
df5.values,
2791
index=df5.index,
2792
columns=pd.MultiIndex.from_tuples([
2793
('a7', 'a8', 1),
2794
('b7', 'b8', 2),
2795
('c7', 'c8', 3)
2796
], names=['c7', 'c8', None])
2797
)
2798
)
2799
2800
def test_drop_levels(self):
2801
pd.testing.assert_frame_equal(
2802
df5.vbt.drop_levels('c7'),
2803
pd.DataFrame(
2804
df5.values,
2805
index=df5.index,
2806
columns=pd.Index(['a8', 'b8', 'c8'], dtype='object', name='c8')
2807
)
2808
)
2809
2810
def test_rename_levels(self):
2811
pd.testing.assert_frame_equal(
2812
df5.vbt.rename_levels({'c8': 'c9'}),
2813
pd.DataFrame(
2814
df5.values,
2815
index=df5.index,
2816
columns=pd.MultiIndex.from_tuples([
2817
('a7', 'a8'),
2818
('b7', 'b8'),
2819
('c7', 'c8')
2820
], names=['c7', 'c9'])
2821
)
2822
)
2823
2824
def test_select_levels(self):
2825
pd.testing.assert_frame_equal(
2826
df5.vbt.select_levels('c8'),
2827
pd.DataFrame(
2828
df5.values,
2829
index=df5.index,
2830
columns=pd.Index(['a8', 'b8', 'c8'], dtype='object', name='c8')
2831
)
2832
)
2833
2834
def test_drop_redundant_levels(self):
2835
pd.testing.assert_frame_equal(
2836
df5.vbt.stack_index(pd.RangeIndex(start=0, step=1, stop=3)).vbt.drop_redundant_levels(),
2837
df5
2838
)
2839
2840
def test_drop_duplicate_levels(self):
2841
pd.testing.assert_frame_equal(
2842
df5.vbt.stack_index(df5.columns.get_level_values(0)).vbt.drop_duplicate_levels(),
2843
df5
2844
)
2845
2846
def test_to_array(self):
2847
np.testing.assert_array_equal(sr2.vbt.to_1d_array(), sr2.values)
2848
np.testing.assert_array_equal(sr2.vbt.to_2d_array(), sr2.to_frame().values)
2849
np.testing.assert_array_equal(df2.vbt.to_1d_array(), df2.iloc[:, 0].values)
2850
np.testing.assert_array_equal(df2.vbt.to_2d_array(), df2.values)
2851
2852
def test_tile(self):
2853
pd.testing.assert_frame_equal(
2854
df4.vbt.tile(2, keys=['a', 'b'], axis=0),
2855
pd.DataFrame(
2856
np.asarray([
2857
[1, 2, 3],
2858
[4, 5, 6],
2859
[7, 8, 9],
2860
[1, 2, 3],
2861
[4, 5, 6],
2862
[7, 8, 9]
2863
]),
2864
index=pd.MultiIndex.from_tuples([
2865
('a', 'x6'),
2866
('a', 'y6'),
2867
('a', 'z6'),
2868
('b', 'x6'),
2869
('b', 'y6'),
2870
('b', 'z6')
2871
], names=[None, 'i6']),
2872
columns=df4.columns
2873
)
2874
)
2875
pd.testing.assert_frame_equal(
2876
df4.vbt.tile(2, keys=['a', 'b'], axis=1),
2877
pd.DataFrame(
2878
np.asarray([
2879
[1, 2, 3, 1, 2, 3],
2880
[4, 5, 6, 4, 5, 6],
2881
[7, 8, 9, 7, 8, 9]
2882
]),
2883
index=df4.index,
2884
columns=pd.MultiIndex.from_tuples([
2885
('a', 'a6'),
2886
('a', 'b6'),
2887
('a', 'c6'),
2888
('b', 'a6'),
2889
('b', 'b6'),
2890
('b', 'c6')
2891
], names=[None, 'c6'])
2892
)
2893
)
2894
2895
def test_repeat(self):
2896
pd.testing.assert_frame_equal(
2897
df4.vbt.repeat(2, keys=['a', 'b'], axis=0),
2898
pd.DataFrame(
2899
np.asarray([
2900
[1, 2, 3],
2901
[1, 2, 3],
2902
[4, 5, 6],
2903
[4, 5, 6],
2904
[7, 8, 9],
2905
[7, 8, 9]
2906
]),
2907
index=pd.MultiIndex.from_tuples([
2908
('x6', 'a'),
2909
('x6', 'b'),
2910
('y6', 'a'),
2911
('y6', 'b'),
2912
('z6', 'a'),
2913
('z6', 'b')
2914
], names=['i6', None]),
2915
columns=df4.columns
2916
)
2917
)
2918
pd.testing.assert_frame_equal(
2919
df4.vbt.repeat(2, keys=['a', 'b'], axis=1),
2920
pd.DataFrame(
2921
np.asarray([
2922
[1, 1, 2, 2, 3, 3],
2923
[4, 4, 5, 5, 6, 6],
2924
[7, 7, 8, 8, 9, 9]
2925
]),
2926
index=df4.index,
2927
columns=pd.MultiIndex.from_tuples([
2928
('a6', 'a'),
2929
('a6', 'b'),
2930
('b6', 'a'),
2931
('b6', 'b'),
2932
('c6', 'a'),
2933
('c6', 'b')
2934
], names=['c6', None])
2935
)
2936
)
2937
2938
def test_align_to(self):
2939
multi_c1 = pd.MultiIndex.from_arrays([['a8', 'b8']], names=['c8'])
2940
multi_c2 = pd.MultiIndex.from_arrays([['a7', 'a7', 'c7', 'c7'], ['a8', 'b8', 'a8', 'b8']], names=['c7', 'c8'])
2941
df10 = pd.DataFrame([[1, 2], [4, 5], [7, 8]], columns=multi_c1)
2942
df20 = pd.DataFrame([[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 10]], columns=multi_c2)
2943
pd.testing.assert_frame_equal(
2944
df10.vbt.align_to(df20),
2945
pd.DataFrame(
2946
np.asarray([
2947
[1, 2, 1, 2],
2948
[4, 5, 4, 5],
2949
[7, 8, 7, 8]
2950
]),
2951
index=pd.RangeIndex(start=0, stop=3, step=1),
2952
columns=multi_c2
2953
)
2954
)
2955
2956
def test_broadcast(self):
2957
a, b = pd.Series.vbt.broadcast(sr2, 10)
2958
b_target = pd.Series(np.full(sr2.shape, 10), index=sr2.index, name=sr2.name)
2959
pd.testing.assert_series_equal(a, sr2)
2960
pd.testing.assert_series_equal(b, b_target)
2961
a, b = sr2.vbt.broadcast(10)
2962
pd.testing.assert_series_equal(a, sr2)
2963
pd.testing.assert_series_equal(b, b_target)
2964
2965
def test_broadcast_to(self):
2966
pd.testing.assert_frame_equal(sr2.vbt.broadcast_to(df2), df2)
2967
pd.testing.assert_frame_equal(sr2.vbt.broadcast_to(df2.vbt), df2)
2968
2969
def test_apply(self):
2970
pd.testing.assert_series_equal(sr2.vbt.apply(apply_func=lambda x: x ** 2), sr2 ** 2)
2971
pd.testing.assert_series_equal(sr2.vbt.apply(apply_func=lambda x: x ** 2, to_2d=True), sr2 ** 2)
2972
pd.testing.assert_frame_equal(df4.vbt.apply(apply_func=lambda x: x ** 2), df4 ** 2)
2973
2974
def test_concat(self):
2975
pd.testing.assert_frame_equal(
2976
pd.DataFrame.vbt.concat(pd.Series([1, 2, 3]), pd.Series([1, 2, 3])),
2977
pd.DataFrame({0: pd.Series([1, 2, 3]), 1: pd.Series([1, 2, 3])})
2978
)
2979
target = pd.DataFrame(
2980
np.array([
2981
[1, 1, 1, 10, 10, 10, 1, 2, 3],
2982
[2, 2, 2, 10, 10, 10, 4, 5, 6],
2983
[3, 3, 3, 10, 10, 10, 7, 8, 9]
2984
]),
2985
index=pd.MultiIndex.from_tuples([
2986
('x2', 'x6'),
2987
('y2', 'y6'),
2988
('z2', 'z6')
2989
], names=['i2', 'i6']),
2990
columns=pd.MultiIndex.from_tuples([
2991
('a', 'a6'),
2992
('a', 'b6'),
2993
('a', 'c6'),
2994
('b', 'a6'),
2995
('b', 'b6'),
2996
('b', 'c6'),
2997
('c', 'a6'),
2998
('c', 'b6'),
2999
('c', 'c6')
3000
], names=[None, 'c6'])
3001
)
3002
pd.testing.assert_frame_equal(
3003
pd.DataFrame.vbt.concat(sr2, 10, df4, keys=['a', 'b', 'c']),
3004
target
3005
)
3006
pd.testing.assert_frame_equal(
3007
sr2.vbt.concat(10, df4, keys=['a', 'b', 'c']),
3008
target
3009
)
3010
3011
def test_apply_and_concat(self):
3012
def apply_func(i, x, y, c, d=1):
3013
return x + y[i] + c + d
3014
3015
@njit
3016
def apply_func_nb(i, x, y, c, d):
3017
return x + y[i] + c + d
3018
3019
target = pd.DataFrame(
3020
np.array([
3021
[112, 113, 114],
3022
[113, 114, 115],
3023
[114, 115, 116]
3024
]),
3025
index=pd.Index(['x2', 'y2', 'z2'], dtype='object', name='i2'),
3026
columns=pd.Index(['a', 'b', 'c'], dtype='object')
3027
)
3028
pd.testing.assert_frame_equal(
3029
sr2.vbt.apply_and_concat(
3030
3, np.array([1, 2, 3]), 10, apply_func=apply_func, d=100,
3031
keys=['a', 'b', 'c']
3032
),
3033
target
3034
)
3035
pd.testing.assert_frame_equal(
3036
sr2.vbt.apply_and_concat(
3037
3, np.array([1, 2, 3]), 10, 100, apply_func=apply_func_nb, numba_loop=True,
3038
keys=['a', 'b', 'c']
3039
),
3040
target
3041
)
3042
if ray_available:
3043
with pytest.raises(Exception):
3044
sr2.vbt.apply_and_concat(
3045
3, np.array([1, 2, 3]), 10, 100, apply_func=apply_func_nb, numba_loop=True, use_ray=True,
3046
keys=['a', 'b', 'c']
3047
)
3048
pd.testing.assert_frame_equal(
3049
sr2.vbt.apply_and_concat(
3050
3, np.array([1, 2, 3]), 10, apply_func=apply_func, d=100,
3051
keys=['a', 'b', 'c'], use_ray=True
3052
),
3053
target
3054
)
3055
pd.testing.assert_frame_equal(
3056
sr2.vbt.apply_and_concat(
3057
3, np.array([1, 2, 3]), 10, apply_func=apply_func, d=100
3058
),
3059
pd.DataFrame(
3060
target.values,
3061
index=target.index,
3062
columns=pd.Index([0, 1, 2], dtype='int64', name='apply_idx')
3063
)
3064
)
3065
3066
def apply_func2(i, x, y, c, d=1):
3067
return x + y + c + d
3068
3069
pd.testing.assert_frame_equal(
3070
sr2.vbt.apply_and_concat(
3071
3, np.array([[1], [2], [3]]), 10, apply_func=apply_func2, d=100,
3072
keys=['a', 'b', 'c'],
3073
to_2d=True # otherwise (3, 1) + (1, 3) = (3, 3) != (3, 1) -> error
3074
),
3075
pd.DataFrame(
3076
np.array([
3077
[112, 112, 112],
3078
[114, 114, 114],
3079
[116, 116, 116]
3080
]),
3081
index=target.index,
3082
columns=target.columns
3083
)
3084
)
3085
target2 = pd.DataFrame(
3086
np.array([
3087
[112, 113, 114],
3088
[113, 114, 115],
3089
[114, 115, 116]
3090
]),
3091
index=pd.Index(['x4', 'y4', 'z4'], dtype='object', name='i4'),
3092
columns=pd.MultiIndex.from_tuples([
3093
('a', 'a4'),
3094
('b', 'a4'),
3095
('c', 'a4')
3096
], names=[None, 'c4'])
3097
)
3098
pd.testing.assert_frame_equal(
3099
df2.vbt.apply_and_concat(
3100
3, np.array([1, 2, 3]), 10, apply_func=apply_func, d=100,
3101
keys=['a', 'b', 'c']
3102
),
3103
target2
3104
)
3105
pd.testing.assert_frame_equal(
3106
df2.vbt.apply_and_concat(
3107
3, np.array([1, 2, 3]), 10, 100, apply_func=apply_func_nb, numba_loop=True,
3108
keys=['a', 'b', 'c']
3109
),
3110
target2
3111
)
3112
if ray_available:
3113
pd.testing.assert_frame_equal(
3114
df2.vbt.apply_and_concat(
3115
3, np.array([1, 2, 3]), 10, apply_func=apply_func, d=100,
3116
keys=['a', 'b', 'c'], use_ray=True
3117
),
3118
target2
3119
)
3120
3121
def test_combine(self):
3122
def combine_func(x, y, a, b=1):
3123
return x + y + a + b
3124
3125
@njit
3126
def combine_func_nb(x, y, a, b):
3127
return x + y + a + b
3128
3129
pd.testing.assert_series_equal(
3130
sr2.vbt.combine(10, 100, b=1000, combine_func=combine_func),
3131
pd.Series(
3132
np.array([1111, 1112, 1113]),
3133
index=pd.Index(['x2', 'y2', 'z2'], dtype='object', name='i2'),
3134
name=sr2.name
3135
)
3136
)
3137
pd.testing.assert_series_equal(
3138
sr2.vbt.combine(10, 100, 1000, combine_func=combine_func_nb),
3139
pd.Series(
3140
np.array([1111, 1112, 1113]),
3141
index=pd.Index(['x2', 'y2', 'z2'], dtype='object', name='i2'),
3142
name=sr2.name
3143
)
3144
)
3145
3146
@njit
3147
def combine_func2_nb(x, y):
3148
return x + y + np.array([[1], [2], [3]])
3149
3150
pd.testing.assert_series_equal(
3151
sr2.vbt.combine(10, combine_func=combine_func2_nb, to_2d=True),
3152
pd.Series(
3153
np.array([12, 14, 16]),
3154
index=pd.Index(['x2', 'y2', 'z2'], dtype='object', name='i2'),
3155
name='a2'
3156
)
3157
)
3158
3159
@njit
3160
def combine_func3_nb(x, y):
3161
return x + y
3162
3163
pd.testing.assert_frame_equal(
3164
df4.vbt.combine(sr2, combine_func=combine_func3_nb),
3165
pd.DataFrame(
3166
np.array([
3167
[2, 3, 4],
3168
[6, 7, 8],
3169
[10, 11, 12]
3170
]),
3171
index=pd.MultiIndex.from_tuples([
3172
('x6', 'x2'),
3173
('y6', 'y2'),
3174
('z6', 'z2')
3175
], names=['i6', 'i2']),
3176
columns=pd.Index(['a6', 'b6', 'c6'], dtype='object', name='c6')
3177
)
3178
)
3179
3180
target = pd.DataFrame(
3181
np.array([
3182
[232, 233, 234],
3183
[236, 237, 238],
3184
[240, 241, 242]
3185
]),
3186
index=pd.MultiIndex.from_tuples([
3187
('x2', 'x6'),
3188
('y2', 'y6'),
3189
('z2', 'z6')
3190
], names=['i2', 'i6']),
3191
columns=pd.Index(['a6', 'b6', 'c6'], dtype='object', name='c6')
3192
)
3193
pd.testing.assert_frame_equal(
3194
sr2.vbt.combine(
3195
[10, df4], 10, b=100,
3196
combine_func=combine_func
3197
),
3198
target
3199
)
3200
pd.testing.assert_frame_equal(
3201
sr2.vbt.combine(
3202
[10, df4], 10, 100,
3203
combine_func=combine_func_nb, numba_loop=True
3204
),
3205
target
3206
)
3207
if ray_available:
3208
with pytest.raises(Exception):
3209
sr2.vbt.combine(
3210
[10, df4], 10, 100,
3211
combine_func=combine_func_nb, numba_loop=True, use_ray=True
3212
)
3213
pd.testing.assert_frame_equal(
3214
df4.vbt.combine(
3215
[10, sr2], 10, b=100,
3216
combine_func=combine_func
3217
),
3218
pd.DataFrame(
3219
target.values,
3220
index=pd.MultiIndex.from_tuples([
3221
('x6', 'x2'),
3222
('y6', 'y2'),
3223
('z6', 'z2')
3224
], names=['i6', 'i2']),
3225
columns=target.columns
3226
)
3227
)
3228
target2 = pd.DataFrame(
3229
np.array([
3230
[121, 121, 121, 112, 113, 114],
3231
[122, 122, 122, 116, 117, 118],
3232
[123, 123, 123, 120, 121, 122]
3233
]),
3234
index=pd.MultiIndex.from_tuples([
3235
('x2', 'x6'),
3236
('y2', 'y6'),
3237
('z2', 'z6')
3238
], names=['i2', 'i6']),
3239
columns=pd.MultiIndex.from_tuples([
3240
(0, 'a6'),
3241
(0, 'b6'),
3242
(0, 'c6'),
3243
(1, 'a6'),
3244
(1, 'b6'),
3245
(1, 'c6')
3246
], names=['combine_idx', 'c6'])
3247
)
3248
pd.testing.assert_frame_equal(
3249
sr2.vbt.combine(
3250
[10, df4], 10, b=100,
3251
combine_func=combine_func,
3252
concat=True
3253
),
3254
target2
3255
)
3256
pd.testing.assert_frame_equal(
3257
sr2.vbt.combine(
3258
[10, df4], 10, 100,
3259
combine_func=combine_func_nb, numba_loop=True,
3260
concat=True
3261
),
3262
target2
3263
)
3264
if ray_available:
3265
pd.testing.assert_frame_equal(
3266
sr2.vbt.combine(
3267
[10, df4], 10, b=100,
3268
combine_func=combine_func,
3269
concat=True,
3270
use_ray=True
3271
),
3272
target2
3273
)
3274
pd.testing.assert_frame_equal(
3275
sr2.vbt.combine(
3276
[10, df4], 10, b=100,
3277
combine_func=lambda x, y, a, b=1: x + y + a + b,
3278
concat=True,
3279
keys=['a', 'b']
3280
),
3281
pd.DataFrame(
3282
target2.values,
3283
index=target2.index,
3284
columns=pd.MultiIndex.from_tuples([
3285
('a', 'a6'),
3286
('a', 'b6'),
3287
('a', 'c6'),
3288
('b', 'a6'),
3289
('b', 'b6'),
3290
('b', 'c6')
3291
], names=[None, 'c6'])
3292
)
3293
)
3294
3295