Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/core/test/test_intrin_utils.hpp
16337 views
1
// This file is part of OpenCV project.
2
// It is subject to the license terms in the LICENSE file found in the top-level directory
3
// of this distribution and at http://opencv.org/license.html.
4
5
// This file is not standalone.
6
// It is included with these active namespaces:
7
//namespace opencv_test { namespace hal { namespace intrinXXX {
8
//CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
9
10
void test_hal_intrin_uint8();
11
void test_hal_intrin_int8();
12
void test_hal_intrin_uint16();
13
void test_hal_intrin_int16();
14
void test_hal_intrin_uint32();
15
void test_hal_intrin_int32();
16
void test_hal_intrin_uint64();
17
void test_hal_intrin_int64();
18
void test_hal_intrin_float32();
19
void test_hal_intrin_float64();
20
21
void test_hal_intrin_float16();
22
23
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
24
25
template <typename R> struct Data;
26
template <int N> struct initializer;
27
28
template <> struct initializer<64>
29
{
30
template <typename R> static R init(const Data<R> & d)
31
{
32
return R(d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7], d[8], d[9], d[10], d[11], d[12], d[13], d[14], d[15],
33
d[16], d[17], d[18], d[19], d[20], d[21], d[22], d[23], d[24], d[25], d[26], d[27], d[28], d[29], d[30], d[31],
34
d[32], d[33], d[34], d[35], d[36], d[37], d[38], d[39], d[40], d[41], d[42], d[43], d[44], d[45], d[46], d[47],
35
d[48], d[49], d[50], d[51], d[52], d[53], d[54], d[55], d[56], d[57], d[58], d[59], d[50], d[51], d[52], d[53],
36
d[54], d[55], d[56], d[57], d[58], d[59], d[60], d[61], d[62], d[63]);
37
}
38
};
39
40
template <> struct initializer<32>
41
{
42
template <typename R> static R init(const Data<R> & d)
43
{
44
return R(d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7], d[8], d[9], d[10], d[11], d[12], d[13], d[14], d[15],
45
d[16], d[17], d[18], d[19], d[20], d[21], d[22], d[23], d[24], d[25], d[26], d[27], d[28], d[29], d[30], d[31]);
46
}
47
};
48
49
template <> struct initializer<16>
50
{
51
template <typename R> static R init(const Data<R> & d)
52
{
53
return R(d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7], d[8], d[9], d[10], d[11], d[12], d[13], d[14], d[15]);
54
}
55
};
56
57
template <> struct initializer<8>
58
{
59
template <typename R> static R init(const Data<R> & d)
60
{
61
return R(d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7]);
62
}
63
};
64
65
template <> struct initializer<4>
66
{
67
template <typename R> static R init(const Data<R> & d)
68
{
69
return R(d[0], d[1], d[2], d[3]);
70
}
71
};
72
73
template <> struct initializer<2>
74
{
75
template <typename R> static R init(const Data<R> & d)
76
{
77
return R(d[0], d[1]);
78
}
79
};
80
81
//==================================================================================================
82
83
template <typename R> struct Data
84
{
85
typedef typename R::lane_type LaneType;
86
typedef typename V_TypeTraits<LaneType>::int_type int_type;
87
88
Data()
89
{
90
for (int i = 0; i < R::nlanes; ++i)
91
d[i] = (LaneType)(i + 1);
92
}
93
Data(LaneType val)
94
{
95
fill(val);
96
}
97
Data(const R & r)
98
{
99
*this = r;
100
}
101
operator R ()
102
{
103
return initializer<R::nlanes>().init(*this);
104
}
105
Data<R> & operator=(const R & r)
106
{
107
v_store(d, r);
108
return *this;
109
}
110
template <typename T> Data<R> & operator*=(T m)
111
{
112
for (int i = 0; i < R::nlanes; ++i)
113
d[i] *= (LaneType)m;
114
return *this;
115
}
116
template <typename T> Data<R> & operator+=(T m)
117
{
118
for (int i = 0; i < R::nlanes; ++i)
119
d[i] += (LaneType)m;
120
return *this;
121
}
122
void fill(LaneType val)
123
{
124
for (int i = 0; i < R::nlanes; ++i)
125
d[i] = val;
126
}
127
void reverse()
128
{
129
for (int i = 0; i < R::nlanes / 2; ++i)
130
std::swap(d[i], d[R::nlanes - i - 1]);
131
}
132
const LaneType & operator[](int i) const
133
{
134
CV_Assert(i >= 0 && i < R::nlanes);
135
return d[i];
136
}
137
LaneType & operator[](int i)
138
{
139
CV_Assert(i >= 0 && i < R::nlanes);
140
return d[i];
141
}
142
int_type as_int(int i) const
143
{
144
CV_Assert(i >= 0 && i < R::nlanes);
145
union
146
{
147
LaneType l;
148
int_type i;
149
} v;
150
v.l = d[i];
151
return v.i;
152
}
153
const LaneType * mid() const
154
{
155
return d + R::nlanes / 2;
156
}
157
LaneType * mid()
158
{
159
return d + R::nlanes / 2;
160
}
161
LaneType sum(int s, int c)
162
{
163
LaneType res = 0;
164
for (int i = s; i < s + c; ++i)
165
res += d[i];
166
return res;
167
}
168
LaneType sum()
169
{
170
return sum(0, R::nlanes);
171
}
172
bool operator==(const Data<R> & other) const
173
{
174
for (int i = 0; i < R::nlanes; ++i)
175
if (d[i] != other.d[i])
176
return false;
177
return true;
178
}
179
void clear()
180
{
181
fill(0);
182
}
183
bool isZero() const
184
{
185
return isValue(0);
186
}
187
bool isValue(uchar val) const
188
{
189
for (int i = 0; i < R::nlanes; ++i)
190
if (d[i] != val)
191
return false;
192
return true;
193
}
194
LaneType d[R::nlanes];
195
};
196
197
template<typename R> struct AlignedData
198
{
199
Data<R> CV_DECL_ALIGNED(CV_SIMD_WIDTH) a; // aligned
200
char dummy;
201
Data<R> u; // unaligned
202
};
203
204
template <typename R> std::ostream & operator<<(std::ostream & out, const Data<R> & d)
205
{
206
out << "{ ";
207
for (int i = 0; i < R::nlanes; ++i)
208
{
209
// out << std::hex << +V_TypeTraits<typename R::lane_type>::reinterpret_int(d.d[i]);
210
out << +d.d[i];
211
if (i + 1 < R::nlanes)
212
out << ", ";
213
}
214
out << " }";
215
return out;
216
}
217
218
template<typename T> static inline void EXPECT_COMPARE_EQ_(const T a, const T b);
219
template<> inline void EXPECT_COMPARE_EQ_<float>(const float a, const float b)
220
{
221
EXPECT_FLOAT_EQ( a, b );
222
}
223
224
template<> inline void EXPECT_COMPARE_EQ_<double>(const double a, const double b)
225
{
226
EXPECT_DOUBLE_EQ( a, b );
227
}
228
229
// pack functions do not do saturation when converting from 64-bit types
230
template<typename T, typename W>
231
inline T pack_saturate_cast(W a) { return saturate_cast<T>(a); }
232
template<>
233
inline int pack_saturate_cast<int, int64>(int64 a) { return static_cast<int>(a); }
234
template<>
235
inline unsigned pack_saturate_cast<unsigned, uint64>(uint64 a) { return static_cast<unsigned>(a); }
236
237
template<typename R> struct TheTest
238
{
239
typedef typename R::lane_type LaneType;
240
241
template <typename T1, typename T2>
242
static inline void EXPECT_COMPARE_EQ(const T1 a, const T2 b)
243
{
244
EXPECT_COMPARE_EQ_<LaneType>((LaneType)a, (LaneType)b);
245
}
246
247
TheTest & test_loadstore()
248
{
249
AlignedData<R> data;
250
AlignedData<R> out;
251
252
// check if addresses are aligned and unaligned respectively
253
EXPECT_EQ((size_t)0, (size_t)&data.a.d % CV_SIMD_WIDTH);
254
EXPECT_NE((size_t)0, (size_t)&data.u.d % CV_SIMD_WIDTH);
255
EXPECT_EQ((size_t)0, (size_t)&out.a.d % CV_SIMD_WIDTH);
256
EXPECT_NE((size_t)0, (size_t)&out.u.d % CV_SIMD_WIDTH);
257
258
// check some initialization methods
259
R r1 = data.a;
260
R r2 = vx_load(data.u.d);
261
R r3 = vx_load_aligned(data.a.d);
262
R r4(r2);
263
EXPECT_EQ(data.a[0], r1.get0());
264
EXPECT_EQ(data.u[0], r2.get0());
265
EXPECT_EQ(data.a[0], r3.get0());
266
EXPECT_EQ(data.u[0], r4.get0());
267
268
R r_low = vx_load_low((LaneType*)data.u.d);
269
EXPECT_EQ(data.u[0], r_low.get0());
270
v_store(out.u.d, r_low);
271
for (int i = 0; i < R::nlanes/2; ++i)
272
{
273
SCOPED_TRACE(cv::format("i=%d", i));
274
EXPECT_EQ((LaneType)data.u[i], (LaneType)out.u[i]);
275
}
276
277
R r_low_align8byte = vx_load_low((LaneType*)((char*)data.u.d + (CV_SIMD_WIDTH / 2)));
278
EXPECT_EQ(data.u[R::nlanes/2], r_low_align8byte.get0());
279
v_store(out.u.d, r_low_align8byte);
280
for (int i = 0; i < R::nlanes/2; ++i)
281
{
282
SCOPED_TRACE(cv::format("i=%d", i));
283
EXPECT_EQ((LaneType)data.u[i + R::nlanes/2], (LaneType)out.u[i]);
284
}
285
286
// check some store methods
287
out.u.clear();
288
out.a.clear();
289
v_store(out.u.d, r1);
290
v_store_aligned(out.a.d, r2);
291
EXPECT_EQ(data.a, out.a);
292
EXPECT_EQ(data.u, out.u);
293
294
// check more store methods
295
Data<R> d, res(0);
296
R r5 = d;
297
v_store_high(res.mid(), r5);
298
v_store_low(res.d, r5);
299
EXPECT_EQ(d, res);
300
301
// check halves load correctness
302
res.clear();
303
R r6 = vx_load_halves(d.d, d.mid());
304
v_store(res.d, r6);
305
EXPECT_EQ(d, res);
306
307
// zero, all
308
Data<R> resZ, resV;
309
resZ.fill((LaneType)0);
310
resV.fill((LaneType)8);
311
for (int i = 0; i < R::nlanes; ++i)
312
{
313
SCOPED_TRACE(cv::format("i=%d", i));
314
EXPECT_EQ((LaneType)0, resZ[i]);
315
EXPECT_EQ((LaneType)8, resV[i]);
316
}
317
318
// reinterpret_as
319
v_uint8 vu8 = v_reinterpret_as_u8(r1); out.a.clear(); v_store((uchar*)out.a.d, vu8); EXPECT_EQ(data.a, out.a);
320
v_int8 vs8 = v_reinterpret_as_s8(r1); out.a.clear(); v_store((schar*)out.a.d, vs8); EXPECT_EQ(data.a, out.a);
321
v_uint16 vu16 = v_reinterpret_as_u16(r1); out.a.clear(); v_store((ushort*)out.a.d, vu16); EXPECT_EQ(data.a, out.a);
322
v_int16 vs16 = v_reinterpret_as_s16(r1); out.a.clear(); v_store((short*)out.a.d, vs16); EXPECT_EQ(data.a, out.a);
323
v_uint32 vu32 = v_reinterpret_as_u32(r1); out.a.clear(); v_store((unsigned*)out.a.d, vu32); EXPECT_EQ(data.a, out.a);
324
v_int32 vs32 = v_reinterpret_as_s32(r1); out.a.clear(); v_store((int*)out.a.d, vs32); EXPECT_EQ(data.a, out.a);
325
v_uint64 vu64 = v_reinterpret_as_u64(r1); out.a.clear(); v_store((uint64*)out.a.d, vu64); EXPECT_EQ(data.a, out.a);
326
v_int64 vs64 = v_reinterpret_as_s64(r1); out.a.clear(); v_store((int64*)out.a.d, vs64); EXPECT_EQ(data.a, out.a);
327
v_float32 vf32 = v_reinterpret_as_f32(r1); out.a.clear(); v_store((float*)out.a.d, vf32); EXPECT_EQ(data.a, out.a);
328
#if CV_SIMD_64F
329
v_float64 vf64 = v_reinterpret_as_f64(r1); out.a.clear(); v_store((double*)out.a.d, vf64); EXPECT_EQ(data.a, out.a);
330
#endif
331
332
return *this;
333
}
334
335
TheTest & test_interleave()
336
{
337
Data<R> data1, data2, data3, data4;
338
data2 += 20;
339
data3 += 40;
340
data4 += 60;
341
342
343
R a = data1, b = data2, c = data3;
344
R d = data1, e = data2, f = data3, g = data4;
345
346
LaneType buf3[R::nlanes * 3];
347
LaneType buf4[R::nlanes * 4];
348
349
v_store_interleave(buf3, a, b, c);
350
v_store_interleave(buf4, d, e, f, g);
351
352
Data<R> z(0);
353
a = b = c = d = e = f = g = z;
354
355
v_load_deinterleave(buf3, a, b, c);
356
v_load_deinterleave(buf4, d, e, f, g);
357
358
for (int i = 0; i < R::nlanes; ++i)
359
{
360
SCOPED_TRACE(cv::format("i=%d", i));
361
EXPECT_EQ(data1, Data<R>(a));
362
EXPECT_EQ(data2, Data<R>(b));
363
EXPECT_EQ(data3, Data<R>(c));
364
365
EXPECT_EQ(data1, Data<R>(d));
366
EXPECT_EQ(data2, Data<R>(e));
367
EXPECT_EQ(data3, Data<R>(f));
368
EXPECT_EQ(data4, Data<R>(g));
369
}
370
371
return *this;
372
}
373
374
// float32x4 only
375
TheTest & test_interleave_2channel()
376
{
377
Data<R> data1, data2;
378
data2 += 20;
379
380
R a = data1, b = data2;
381
382
LaneType buf2[R::nlanes * 2];
383
384
v_store_interleave(buf2, a, b);
385
386
Data<R> z(0);
387
a = b = z;
388
389
v_load_deinterleave(buf2, a, b);
390
391
for (int i = 0; i < R::nlanes; ++i)
392
{
393
SCOPED_TRACE(cv::format("i=%d", i));
394
EXPECT_EQ(data1, Data<R>(a));
395
EXPECT_EQ(data2, Data<R>(b));
396
}
397
398
return *this;
399
}
400
401
// v_expand and v_load_expand
402
TheTest & test_expand()
403
{
404
typedef typename V_RegTraits<R>::w_reg Rx2;
405
Data<R> dataA;
406
R a = dataA;
407
408
Data<Rx2> resB = vx_load_expand(dataA.d);
409
410
Rx2 c, d, e, f;
411
v_expand(a, c, d);
412
413
e = v_expand_low(a);
414
f = v_expand_high(a);
415
416
Data<Rx2> resC = c, resD = d, resE = e, resF = f;
417
const int n = Rx2::nlanes;
418
for (int i = 0; i < n; ++i)
419
{
420
SCOPED_TRACE(cv::format("i=%d", i));
421
EXPECT_EQ(dataA[i], resB[i]);
422
EXPECT_EQ(dataA[i], resC[i]);
423
EXPECT_EQ(dataA[i + n], resD[i]);
424
EXPECT_EQ(dataA[i], resE[i]);
425
EXPECT_EQ(dataA[i + n], resF[i]);
426
}
427
428
return *this;
429
}
430
431
TheTest & test_expand_q()
432
{
433
typedef typename V_RegTraits<R>::q_reg Rx4;
434
Data<R> data;
435
Data<Rx4> out = vx_load_expand_q(data.d);
436
const int n = Rx4::nlanes;
437
for (int i = 0; i < n; ++i)
438
{
439
SCOPED_TRACE(cv::format("i=%d", i));
440
EXPECT_EQ(data[i], out[i]);
441
}
442
443
return *this;
444
}
445
446
TheTest & test_addsub()
447
{
448
Data<R> dataA, dataB;
449
dataB.reverse();
450
R a = dataA, b = dataB;
451
452
Data<R> resC = a + b, resD = a - b;
453
for (int i = 0; i < R::nlanes; ++i)
454
{
455
SCOPED_TRACE(cv::format("i=%d", i));
456
EXPECT_EQ(saturate_cast<LaneType>(dataA[i] + dataB[i]), resC[i]);
457
EXPECT_EQ(saturate_cast<LaneType>(dataA[i] - dataB[i]), resD[i]);
458
}
459
460
return *this;
461
}
462
463
TheTest & test_arithm_wrap()
464
{
465
Data<R> dataA, dataB;
466
dataB.reverse();
467
R a = dataA, b = dataB;
468
469
Data<R> resC = v_add_wrap(a, b),
470
resD = v_sub_wrap(a, b),
471
resE = v_mul_wrap(a, b);
472
for (int i = 0; i < R::nlanes; ++i)
473
{
474
SCOPED_TRACE(cv::format("i=%d", i));
475
EXPECT_EQ((LaneType)(dataA[i] + dataB[i]), resC[i]);
476
EXPECT_EQ((LaneType)(dataA[i] - dataB[i]), resD[i]);
477
EXPECT_EQ((LaneType)(dataA[i] * dataB[i]), resE[i]);
478
}
479
return *this;
480
}
481
482
TheTest & test_mul()
483
{
484
Data<R> dataA, dataB;
485
dataA[1] = static_cast<LaneType>(std::numeric_limits<LaneType>::max());
486
dataB.reverse();
487
R a = dataA, b = dataB;
488
489
Data<R> resC = a * b;
490
for (int i = 0; i < R::nlanes; ++i)
491
{
492
SCOPED_TRACE(cv::format("i=%d", i));
493
EXPECT_EQ(saturate_cast<LaneType>(dataA[i] * dataB[i]), resC[i]);
494
}
495
496
return *this;
497
}
498
499
TheTest & test_div()
500
{
501
Data<R> dataA, dataB;
502
dataB.reverse();
503
R a = dataA, b = dataB;
504
505
Data<R> resC = a / b;
506
for (int i = 0; i < R::nlanes; ++i)
507
{
508
SCOPED_TRACE(cv::format("i=%d", i));
509
EXPECT_EQ(dataA[i] / dataB[i], resC[i]);
510
}
511
512
return *this;
513
}
514
515
TheTest & test_mul_expand()
516
{
517
typedef typename V_RegTraits<R>::w_reg Rx2;
518
Data<R> dataA, dataB(2);
519
R a = dataA, b = dataB;
520
Rx2 c, d;
521
522
v_mul_expand(a, b, c, d);
523
524
Data<Rx2> resC = c, resD = d;
525
const int n = R::nlanes / 2;
526
for (int i = 0; i < n; ++i)
527
{
528
SCOPED_TRACE(cv::format("i=%d", i));
529
EXPECT_EQ((typename Rx2::lane_type)dataA[i] * dataB[i], resC[i]);
530
EXPECT_EQ((typename Rx2::lane_type)dataA[i + n] * dataB[i + n], resD[i]);
531
}
532
533
return *this;
534
}
535
536
TheTest & test_abs()
537
{
538
typedef typename V_RegTraits<R>::u_reg Ru;
539
typedef typename Ru::lane_type u_type;
540
Data<R> dataA, dataB(10);
541
R a = dataA, b = dataB;
542
a = a - b;
543
544
Data<Ru> resC = v_abs(a);
545
546
for (int i = 0; i < Ru::nlanes; ++i)
547
{
548
SCOPED_TRACE(cv::format("i=%d", i));
549
EXPECT_EQ((u_type)std::abs(dataA[i] - dataB[i]), resC[i]);
550
}
551
552
return *this;
553
}
554
555
template <int s>
556
TheTest & test_shift()
557
{
558
SCOPED_TRACE(s);
559
Data<R> dataA;
560
dataA[0] = static_cast<LaneType>(std::numeric_limits<LaneType>::max());
561
R a = dataA;
562
563
Data<R> resB = a << s, resC = v_shl<s>(a), resD = a >> s, resE = v_shr<s>(a);
564
565
for (int i = 0; i < R::nlanes; ++i)
566
{
567
SCOPED_TRACE(cv::format("i=%d", i));
568
EXPECT_EQ(static_cast<LaneType>(dataA[i] << s), resB[i]);
569
EXPECT_EQ(static_cast<LaneType>(dataA[i] << s), resC[i]);
570
EXPECT_EQ(static_cast<LaneType>(dataA[i] >> s), resD[i]);
571
EXPECT_EQ(static_cast<LaneType>(dataA[i] >> s), resE[i]);
572
}
573
return *this;
574
}
575
576
TheTest & test_cmp()
577
{
578
Data<R> dataA, dataB;
579
dataB.reverse();
580
dataB += 1;
581
R a = dataA, b = dataB;
582
583
Data<R> resC = (a == b);
584
Data<R> resD = (a != b);
585
Data<R> resE = (a > b);
586
Data<R> resF = (a >= b);
587
Data<R> resG = (a < b);
588
Data<R> resH = (a <= b);
589
590
for (int i = 0; i < R::nlanes; ++i)
591
{
592
SCOPED_TRACE(cv::format("i=%d", i));
593
EXPECT_EQ(dataA[i] == dataB[i], resC[i] != 0);
594
EXPECT_EQ(dataA[i] != dataB[i], resD[i] != 0);
595
EXPECT_EQ(dataA[i] > dataB[i], resE[i] != 0);
596
EXPECT_EQ(dataA[i] >= dataB[i], resF[i] != 0);
597
EXPECT_EQ(dataA[i] < dataB[i], resG[i] != 0);
598
EXPECT_EQ(dataA[i] <= dataB[i], resH[i] != 0);
599
}
600
return *this;
601
}
602
603
TheTest & test_dot_prod()
604
{
605
typedef typename V_RegTraits<R>::w_reg Rx2;
606
typedef typename Rx2::lane_type w_type;
607
608
Data<R> dataA, dataB(2);
609
R a = dataA, b = dataB;
610
611
Data<Rx2> dataC;
612
dataC += std::numeric_limits<w_type>::is_signed ?
613
std::numeric_limits<w_type>::min() :
614
std::numeric_limits<w_type>::max() - R::nlanes * (dataB[0] + 1);
615
Rx2 c = dataC;
616
617
Data<Rx2> resD = v_dotprod(a, b),
618
resE = v_dotprod(a, b, c);
619
620
const int n = R::nlanes / 2;
621
for (int i = 0; i < n; ++i)
622
{
623
SCOPED_TRACE(cv::format("i=%d", i));
624
EXPECT_EQ(dataA[i*2] * dataB[i*2] + dataA[i*2 + 1] * dataB[i*2 + 1], resD[i]);
625
EXPECT_EQ(dataA[i*2] * dataB[i*2] + dataA[i*2 + 1] * dataB[i*2 + 1] + dataC[i], resE[i]);
626
}
627
return *this;
628
}
629
630
TheTest & test_logic()
631
{
632
Data<R> dataA, dataB(2);
633
R a = dataA, b = dataB;
634
635
Data<R> resC = a & b, resD = a | b, resE = a ^ b, resF = ~a;
636
for (int i = 0; i < R::nlanes; ++i)
637
{
638
SCOPED_TRACE(cv::format("i=%d", i));
639
EXPECT_EQ(dataA[i] & dataB[i], resC[i]);
640
EXPECT_EQ(dataA[i] | dataB[i], resD[i]);
641
EXPECT_EQ(dataA[i] ^ dataB[i], resE[i]);
642
EXPECT_EQ((LaneType)~dataA[i], resF[i]);
643
}
644
645
return *this;
646
}
647
648
TheTest & test_sqrt_abs()
649
{
650
Data<R> dataA, dataD;
651
dataD *= -1.0;
652
R a = dataA, d = dataD;
653
654
Data<R> resB = v_sqrt(a), resC = v_invsqrt(a), resE = v_abs(d);
655
for (int i = 0; i < R::nlanes; ++i)
656
{
657
SCOPED_TRACE(cv::format("i=%d", i));
658
EXPECT_COMPARE_EQ((float)std::sqrt(dataA[i]), (float)resB[i]);
659
EXPECT_COMPARE_EQ(1/(float)std::sqrt(dataA[i]), (float)resC[i]);
660
EXPECT_COMPARE_EQ((float)abs(dataA[i]), (float)resE[i]);
661
}
662
663
return *this;
664
}
665
666
TheTest & test_min_max()
667
{
668
Data<R> dataA, dataB;
669
dataB.reverse();
670
R a = dataA, b = dataB;
671
672
Data<R> resC = v_min(a, b), resD = v_max(a, b);
673
for (int i = 0; i < R::nlanes; ++i)
674
{
675
SCOPED_TRACE(cv::format("i=%d", i));
676
EXPECT_EQ(std::min(dataA[i], dataB[i]), resC[i]);
677
EXPECT_EQ(std::max(dataA[i], dataB[i]), resD[i]);
678
}
679
680
return *this;
681
}
682
683
TheTest & test_popcount()
684
{
685
static unsigned popcountTable[] = {
686
0, 1, 2, 4, 5, 7, 9, 12, 13, 15, 17, 20, 22, 25, 28, 32, 33,
687
35, 37, 40, 42, 45, 48, 52, 54, 57, 60, 64, 67, 71, 75, 80, 81,
688
83, 85, 88, 90, 93, 96, 100, 102, 105, 108, 112, 115, 119, 123,
689
128, 130, 133, 136, 140, 143, 147, 151, 156, 159, 163, 167, 172,
690
176, 181, 186, 192, 193
691
};
692
Data<R> dataA;
693
R a = dataA;
694
695
unsigned resB = (unsigned)v_reduce_sum(v_popcount(a));
696
EXPECT_EQ(popcountTable[R::nlanes], resB);
697
698
return *this;
699
}
700
701
TheTest & test_absdiff()
702
{
703
typedef typename V_RegTraits<R>::u_reg Ru;
704
typedef typename Ru::lane_type u_type;
705
Data<R> dataA(std::numeric_limits<LaneType>::max()),
706
dataB(std::numeric_limits<LaneType>::min());
707
dataA[0] = (LaneType)-1;
708
dataB[0] = 1;
709
dataA[1] = 2;
710
dataB[1] = (LaneType)-2;
711
R a = dataA, b = dataB;
712
Data<Ru> resC = v_absdiff(a, b);
713
const u_type mask = std::numeric_limits<LaneType>::is_signed ? (u_type)(1 << (sizeof(u_type)*8 - 1)) : 0;
714
for (int i = 0; i < Ru::nlanes; ++i)
715
{
716
SCOPED_TRACE(cv::format("i=%d", i));
717
u_type uA = dataA[i] ^ mask;
718
u_type uB = dataB[i] ^ mask;
719
EXPECT_EQ(uA > uB ? uA - uB : uB - uA, resC[i]);
720
}
721
return *this;
722
}
723
724
TheTest & test_float_absdiff()
725
{
726
Data<R> dataA(std::numeric_limits<LaneType>::max()),
727
dataB(std::numeric_limits<LaneType>::min());
728
dataA[0] = -1;
729
dataB[0] = 1;
730
dataA[1] = 2;
731
dataB[1] = -2;
732
R a = dataA, b = dataB;
733
Data<R> resC = v_absdiff(a, b);
734
for (int i = 0; i < R::nlanes; ++i)
735
{
736
SCOPED_TRACE(cv::format("i=%d", i));
737
EXPECT_EQ(dataA[i] > dataB[i] ? dataA[i] - dataB[i] : dataB[i] - dataA[i], resC[i]);
738
}
739
return *this;
740
}
741
742
TheTest & test_reduce()
743
{
744
Data<R> dataA;
745
R a = dataA;
746
EXPECT_EQ((LaneType)1, v_reduce_min(a));
747
EXPECT_EQ((LaneType)R::nlanes, v_reduce_max(a));
748
EXPECT_EQ((LaneType)((1 + R::nlanes)*R::nlanes/2), v_reduce_sum(a));
749
return *this;
750
}
751
752
TheTest & test_mask()
753
{
754
typedef typename V_RegTraits<R>::int_reg int_reg;
755
typedef typename V_RegTraits<int_reg>::u_reg uint_reg;
756
typedef typename int_reg::lane_type int_type;
757
typedef typename uint_reg::lane_type uint_type;
758
759
Data<R> dataA, dataB(0), dataC, dataD(1), dataE(2);
760
dataA[1] *= (LaneType)-1;
761
union
762
{
763
LaneType l;
764
uint_type ui;
765
}
766
all1s;
767
all1s.ui = (uint_type)-1;
768
LaneType mask_one = all1s.l;
769
dataB[1] = mask_one;
770
dataB[R::nlanes / 2] = mask_one;
771
dataB[R::nlanes - 1] = mask_one;
772
dataC *= (LaneType)-1;
773
R a = dataA, b = dataB, c = dataC, d = dataD, e = dataE;
774
775
int m = v_signmask(a);
776
EXPECT_EQ(2, m);
777
778
EXPECT_EQ(false, v_check_all(a));
779
EXPECT_EQ(false, v_check_all(b));
780
EXPECT_EQ(true, v_check_all(c));
781
782
EXPECT_EQ(true, v_check_any(a));
783
EXPECT_EQ(true, v_check_any(b));
784
EXPECT_EQ(true, v_check_any(c));
785
786
R f = v_select(b, d, e);
787
Data<R> resF = f;
788
for (int i = 0; i < R::nlanes; ++i)
789
{
790
SCOPED_TRACE(cv::format("i=%d", i));
791
int_type m2 = dataB.as_int(i);
792
EXPECT_EQ((dataD.as_int(i) & m2) | (dataE.as_int(i) & ~m2), resF.as_int(i));
793
}
794
795
return *this;
796
}
797
798
template <int s>
799
TheTest & test_pack()
800
{
801
SCOPED_TRACE(s);
802
typedef typename V_RegTraits<R>::w_reg Rx2;
803
typedef typename Rx2::lane_type w_type;
804
Data<Rx2> dataA, dataB;
805
dataA += std::numeric_limits<LaneType>::is_signed ? -10 : 10;
806
dataB *= 10;
807
dataB[0] = static_cast<w_type>(std::numeric_limits<LaneType>::max()) + 17; // to check saturation
808
Rx2 a = dataA, b = dataB;
809
810
Data<R> resC = v_pack(a, b);
811
Data<R> resD = v_rshr_pack<s>(a, b);
812
813
Data<R> resE(0);
814
v_pack_store(resE.d, b);
815
816
Data<R> resF(0);
817
v_rshr_pack_store<s>(resF.d, b);
818
819
const int n = Rx2::nlanes;
820
const w_type add = (w_type)1 << (s - 1);
821
for (int i = 0; i < n; ++i)
822
{
823
SCOPED_TRACE(cv::format("i=%d", i));
824
EXPECT_EQ(pack_saturate_cast<LaneType>(dataA[i]), resC[i]);
825
EXPECT_EQ(pack_saturate_cast<LaneType>(dataB[i]), resC[i + n]);
826
EXPECT_EQ(pack_saturate_cast<LaneType>((dataA[i] + add) >> s), resD[i]);
827
EXPECT_EQ(pack_saturate_cast<LaneType>((dataB[i] + add) >> s), resD[i + n]);
828
EXPECT_EQ(pack_saturate_cast<LaneType>(dataB[i]), resE[i]);
829
EXPECT_EQ((LaneType)0, resE[i + n]);
830
EXPECT_EQ(pack_saturate_cast<LaneType>((dataB[i] + add) >> s), resF[i]);
831
EXPECT_EQ((LaneType)0, resF[i + n]);
832
}
833
return *this;
834
}
835
836
template <int s>
837
TheTest & test_pack_u()
838
{
839
SCOPED_TRACE(s);
840
//typedef typename V_RegTraits<LaneType>::w_type LaneType_w;
841
typedef typename V_RegTraits<R>::w_reg R2;
842
typedef typename V_RegTraits<R2>::int_reg Ri2;
843
typedef typename Ri2::lane_type w_type;
844
845
Data<Ri2> dataA, dataB;
846
dataA += -10;
847
dataB *= 10;
848
dataB[0] = static_cast<w_type>(std::numeric_limits<LaneType>::max()) + 17; // to check saturation
849
Ri2 a = dataA, b = dataB;
850
851
Data<R> resC = v_pack_u(a, b);
852
Data<R> resD = v_rshr_pack_u<s>(a, b);
853
854
Data<R> resE(0);
855
v_pack_u_store(resE.d, b);
856
857
Data<R> resF(0);
858
v_rshr_pack_u_store<s>(resF.d, b);
859
860
const int n = Ri2::nlanes;
861
const w_type add = (w_type)1 << (s - 1);
862
for (int i = 0; i < n; ++i)
863
{
864
SCOPED_TRACE(cv::format("i=%d", i));
865
EXPECT_EQ(pack_saturate_cast<LaneType>(dataA[i]), resC[i]);
866
EXPECT_EQ(pack_saturate_cast<LaneType>(dataB[i]), resC[i + n]);
867
EXPECT_EQ(pack_saturate_cast<LaneType>((dataA[i] + add) >> s), resD[i]);
868
EXPECT_EQ(pack_saturate_cast<LaneType>((dataB[i] + add) >> s), resD[i + n]);
869
EXPECT_EQ(pack_saturate_cast<LaneType>(dataB[i]), resE[i]);
870
EXPECT_EQ((LaneType)0, resE[i + n]);
871
EXPECT_EQ(pack_saturate_cast<LaneType>((dataB[i] + add) >> s), resF[i]);
872
EXPECT_EQ((LaneType)0, resF[i + n]);
873
}
874
return *this;
875
}
876
877
TheTest & test_unpack()
878
{
879
Data<R> dataA, dataB;
880
dataB *= 10;
881
R a = dataA, b = dataB;
882
883
R c, d, e, f, lo, hi;
884
v_zip(a, b, c, d);
885
v_recombine(a, b, e, f);
886
lo = v_combine_low(a, b);
887
hi = v_combine_high(a, b);
888
889
Data<R> resC = c, resD = d, resE = e, resF = f, resLo = lo, resHi = hi;
890
891
const int n = R::nlanes/2;
892
for (int i = 0; i < n; ++i)
893
{
894
SCOPED_TRACE(cv::format("i=%d", i));
895
EXPECT_EQ(dataA[i], resC[i*2]);
896
EXPECT_EQ(dataB[i], resC[i*2+1]);
897
EXPECT_EQ(dataA[i+n], resD[i*2]);
898
EXPECT_EQ(dataB[i+n], resD[i*2+1]);
899
900
EXPECT_EQ(dataA[i], resE[i]);
901
EXPECT_EQ(dataB[i], resE[i+n]);
902
EXPECT_EQ(dataA[i+n], resF[i]);
903
EXPECT_EQ(dataB[i+n], resF[i+n]);
904
905
EXPECT_EQ(dataA[i], resLo[i]);
906
EXPECT_EQ(dataB[i], resLo[i+n]);
907
EXPECT_EQ(dataA[i+n], resHi[i]);
908
EXPECT_EQ(dataB[i+n], resHi[i+n]);
909
}
910
911
return *this;
912
}
913
914
template<int s>
915
TheTest & test_extract()
916
{
917
SCOPED_TRACE(s);
918
Data<R> dataA, dataB;
919
dataB *= 10;
920
R a = dataA, b = dataB;
921
922
Data<R> resC = v_extract<s>(a, b);
923
924
for (int i = 0; i < R::nlanes; ++i)
925
{
926
SCOPED_TRACE(cv::format("i=%d", i));
927
if (i + s >= R::nlanes)
928
EXPECT_EQ(dataB[i - R::nlanes + s], resC[i]);
929
else
930
EXPECT_EQ(dataA[i + s], resC[i]);
931
}
932
933
return *this;
934
}
935
936
template<int s>
937
TheTest & test_rotate()
938
{
939
SCOPED_TRACE(s);
940
Data<R> dataA, dataB;
941
dataB *= 10;
942
R a = dataA, b = dataB;
943
944
Data<R> resC = v_rotate_right<s>(a);
945
Data<R> resD = v_rotate_right<s>(a, b);
946
947
Data<R> resE = v_rotate_left<s>(a);
948
Data<R> resF = v_rotate_left<s>(a, b);
949
950
for (int i = 0; i < R::nlanes; ++i)
951
{
952
SCOPED_TRACE(cv::format("i=%d", i));
953
if (i + s >= R::nlanes)
954
{
955
EXPECT_EQ((LaneType)0, resC[i]);
956
EXPECT_EQ(dataB[i - R::nlanes + s], resD[i]);
957
958
EXPECT_EQ((LaneType)0, resE[i - R::nlanes + s]);
959
EXPECT_EQ(dataB[i], resF[i - R::nlanes + s]);
960
}
961
else
962
{
963
EXPECT_EQ(dataA[i + s], resC[i]);
964
EXPECT_EQ(dataA[i + s], resD[i]);
965
966
EXPECT_EQ(dataA[i], resE[i + s]);
967
EXPECT_EQ(dataA[i], resF[i + s]);
968
}
969
}
970
return *this;
971
}
972
973
TheTest & test_float_math()
974
{
975
typedef typename V_RegTraits<R>::round_reg Ri;
976
Data<R> data1, data2, data3;
977
data1 *= 1.1;
978
data2 += 10;
979
R a1 = data1, a2 = data2, a3 = data3;
980
981
Data<Ri> resB = v_round(a1),
982
resC = v_trunc(a1),
983
resD = v_floor(a1),
984
resE = v_ceil(a1);
985
986
Data<R> resF = v_magnitude(a1, a2),
987
resG = v_sqr_magnitude(a1, a2),
988
resH = v_muladd(a1, a2, a3);
989
990
for (int i = 0; i < R::nlanes; ++i)
991
{
992
SCOPED_TRACE(cv::format("i=%d", i));
993
EXPECT_EQ(cvRound(data1[i]), resB[i]);
994
EXPECT_EQ((typename Ri::lane_type)data1[i], resC[i]);
995
EXPECT_EQ(cvFloor(data1[i]), resD[i]);
996
EXPECT_EQ(cvCeil(data1[i]), resE[i]);
997
998
EXPECT_COMPARE_EQ(std::sqrt(data1[i]*data1[i] + data2[i]*data2[i]), resF[i]);
999
EXPECT_COMPARE_EQ(data1[i]*data1[i] + data2[i]*data2[i], resG[i]);
1000
EXPECT_COMPARE_EQ(data1[i]*data2[i] + data3[i], resH[i]);
1001
}
1002
1003
return *this;
1004
}
1005
1006
TheTest & test_float_cvt32()
1007
{
1008
typedef v_float32 Rt;
1009
Data<R> dataA;
1010
dataA *= 1.1;
1011
R a = dataA;
1012
Rt b = v_cvt_f32(a);
1013
Data<Rt> resB = b;
1014
int n = std::min<int>(Rt::nlanes, R::nlanes);
1015
for (int i = 0; i < n; ++i)
1016
{
1017
SCOPED_TRACE(cv::format("i=%d", i));
1018
EXPECT_EQ((typename Rt::lane_type)dataA[i], resB[i]);
1019
}
1020
return *this;
1021
}
1022
1023
TheTest & test_float_cvt64()
1024
{
1025
#if CV_SIMD_64F
1026
typedef v_float64 Rt;
1027
Data<R> dataA;
1028
dataA *= 1.1;
1029
R a = dataA;
1030
Rt b = v_cvt_f64(a);
1031
Rt c = v_cvt_f64_high(a);
1032
Data<Rt> resB = b;
1033
Data<Rt> resC = c;
1034
int n = std::min<int>(Rt::nlanes, R::nlanes);
1035
for (int i = 0; i < n; ++i)
1036
{
1037
SCOPED_TRACE(cv::format("i=%d", i));
1038
EXPECT_EQ((typename Rt::lane_type)dataA[i], resB[i]);
1039
}
1040
for (int i = 0; i < n; ++i)
1041
{
1042
SCOPED_TRACE(cv::format("i=%d", i));
1043
EXPECT_EQ((typename Rt::lane_type)dataA[i+n], resC[i]);
1044
}
1045
#endif
1046
return *this;
1047
}
1048
1049
TheTest & test_matmul()
1050
{
1051
Data<R> dataV, dataA, dataB, dataC, dataD;
1052
dataB.reverse();
1053
dataC += 2;
1054
dataD *= 0.3;
1055
R v = dataV, a = dataA, b = dataB, c = dataC, d = dataD;
1056
1057
Data<R> res = v_matmul(v, a, b, c, d);
1058
for (int i = 0; i < R::nlanes; i += 4)
1059
{
1060
for (int j = i; j < i + 4; ++j)
1061
{
1062
SCOPED_TRACE(cv::format("i=%d j=%d", i, j));
1063
LaneType val = dataV[i] * dataA[j]
1064
+ dataV[i + 1] * dataB[j]
1065
+ dataV[i + 2] * dataC[j]
1066
+ dataV[i + 3] * dataD[j];
1067
EXPECT_COMPARE_EQ(val, res[j]);
1068
}
1069
}
1070
1071
Data<R> resAdd = v_matmuladd(v, a, b, c, d);
1072
for (int i = 0; i < R::nlanes; i += 4)
1073
{
1074
for (int j = i; j < i + 4; ++j)
1075
{
1076
SCOPED_TRACE(cv::format("i=%d j=%d", i, j));
1077
LaneType val = dataV[i] * dataA[j]
1078
+ dataV[i + 1] * dataB[j]
1079
+ dataV[i + 2] * dataC[j]
1080
+ dataD[j];
1081
EXPECT_COMPARE_EQ(val, resAdd[j]);
1082
}
1083
}
1084
return *this;
1085
}
1086
1087
TheTest & test_transpose()
1088
{
1089
Data<R> dataA, dataB, dataC, dataD;
1090
dataB *= 5;
1091
dataC *= 10;
1092
dataD *= 15;
1093
R a = dataA, b = dataB, c = dataC, d = dataD;
1094
R e, f, g, h;
1095
v_transpose4x4(a, b, c, d,
1096
e, f, g, h);
1097
1098
Data<R> res[4] = {e, f, g, h};
1099
for (int i = 0; i < R::nlanes; i += 4)
1100
{
1101
for (int j = 0; j < 4; ++j)
1102
{
1103
SCOPED_TRACE(cv::format("i=%d j=%d", i, j));
1104
EXPECT_EQ(dataA[i + j], res[j][i]);
1105
EXPECT_EQ(dataB[i + j], res[j][i + 1]);
1106
EXPECT_EQ(dataC[i + j], res[j][i + 2]);
1107
EXPECT_EQ(dataD[i + j], res[j][i + 3]);
1108
}
1109
}
1110
return *this;
1111
}
1112
1113
TheTest & test_reduce_sum4()
1114
{
1115
Data<R> dataA, dataB, dataC, dataD;
1116
dataB *= 0.01f;
1117
dataC *= 0.001f;
1118
dataD *= 0.002f;
1119
1120
R a = dataA, b = dataB, c = dataC, d = dataD;
1121
Data<R> res = v_reduce_sum4(a, b, c, d);
1122
1123
for (int i = 0; i < R::nlanes; i += 4)
1124
{
1125
SCOPED_TRACE(cv::format("i=%d", i));
1126
EXPECT_COMPARE_EQ(dataA.sum(i, 4), res[i]);
1127
EXPECT_COMPARE_EQ(dataB.sum(i, 4), res[i + 1]);
1128
EXPECT_COMPARE_EQ(dataC.sum(i, 4), res[i + 2]);
1129
EXPECT_COMPARE_EQ(dataD.sum(i, 4), res[i + 3]);
1130
}
1131
return *this;
1132
}
1133
1134
TheTest & test_loadstore_fp16_f32()
1135
{
1136
printf("test_loadstore_fp16_f32 ...\n");
1137
AlignedData<v_uint16> data; data.a.clear();
1138
data.a.d[0] = 0x3c00; // 1.0
1139
data.a.d[R::nlanes - 1] = (unsigned short)0xc000; // -2.0
1140
AlignedData<v_float32> data_f32; data_f32.a.clear();
1141
AlignedData<v_uint16> out;
1142
1143
R r1 = vx_load_expand((const cv::float16_t*)data.a.d);
1144
R r2(r1);
1145
EXPECT_EQ(1.0f, r1.get0());
1146
vx_store(data_f32.a.d, r2);
1147
EXPECT_EQ(-2.0f, data_f32.a.d[R::nlanes - 1]);
1148
1149
out.a.clear();
1150
v_pack_store((cv::float16_t*)out.a.d, r2);
1151
for (int i = 0; i < R::nlanes; ++i)
1152
{
1153
EXPECT_EQ(data.a[i], out.a[i]) << "i=" << i;
1154
}
1155
1156
return *this;
1157
}
1158
1159
#if 0
1160
TheTest & test_loadstore_fp16()
1161
{
1162
printf("test_loadstore_fp16 ...\n");
1163
AlignedData<R> data;
1164
AlignedData<R> out;
1165
1166
// check if addresses are aligned and unaligned respectively
1167
EXPECT_EQ((size_t)0, (size_t)&data.a.d % CV_SIMD_WIDTH);
1168
EXPECT_NE((size_t)0, (size_t)&data.u.d % CV_SIMD_WIDTH);
1169
EXPECT_EQ((size_t)0, (size_t)&out.a.d % CV_SIMD_WIDTH);
1170
EXPECT_NE((size_t)0, (size_t)&out.u.d % CV_SIMD_WIDTH);
1171
1172
// check some initialization methods
1173
R r1 = data.u;
1174
R r2 = vx_load_expand((const float16_t*)data.a.d);
1175
R r3(r2);
1176
EXPECT_EQ(data.u[0], r1.get0());
1177
EXPECT_EQ(data.a[0], r2.get0());
1178
EXPECT_EQ(data.a[0], r3.get0());
1179
1180
// check some store methods
1181
out.a.clear();
1182
v_store(out.a.d, r1);
1183
EXPECT_EQ(data.a, out.a);
1184
1185
return *this;
1186
}
1187
TheTest & test_float_cvt_fp16()
1188
{
1189
printf("test_float_cvt_fp16 ...\n");
1190
AlignedData<v_float32> data;
1191
1192
// check conversion
1193
v_float32 r1 = vx_load(data.a.d);
1194
v_float16 r2 = v_cvt_f16(r1, vx_setzero_f32());
1195
v_float32 r3 = v_cvt_f32(r2);
1196
EXPECT_EQ(0x3c00, r2.get0());
1197
EXPECT_EQ(r3.get0(), r1.get0());
1198
1199
return *this;
1200
}
1201
#endif
1202
};
1203
1204
1205
#if 1
1206
#define DUMP_ENTRY(type) printf("SIMD%d: %s\n", 8*(int)sizeof(v_uint8), CV__TRACE_FUNCTION);
1207
#endif
1208
1209
//============= 8-bit integer =====================================================================
1210
1211
void test_hal_intrin_uint8()
1212
{
1213
DUMP_ENTRY(v_uint8);
1214
TheTest<v_uint8>()
1215
.test_loadstore()
1216
.test_interleave()
1217
.test_expand()
1218
.test_expand_q()
1219
.test_addsub()
1220
.test_arithm_wrap()
1221
.test_mul()
1222
.test_mul_expand()
1223
.test_cmp()
1224
.test_logic()
1225
.test_min_max()
1226
.test_absdiff()
1227
.test_mask()
1228
.test_popcount()
1229
.test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>()
1230
.test_pack_u<1>().test_pack_u<2>().test_pack_u<3>().test_pack_u<8>()
1231
.test_unpack()
1232
.test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>()
1233
.test_rotate<0>().test_rotate<1>().test_rotate<8>().test_rotate<15>()
1234
;
1235
1236
#if CV_SIMD_WIDTH == 32
1237
TheTest<v_uint8>()
1238
.test_pack<9>().test_pack<10>().test_pack<13>().test_pack<15>()
1239
.test_pack_u<9>().test_pack_u<10>().test_pack_u<13>().test_pack_u<15>()
1240
.test_extract<16>().test_extract<17>().test_extract<23>().test_extract<31>()
1241
.test_rotate<16>().test_rotate<17>().test_rotate<23>().test_rotate<31>()
1242
;
1243
#endif
1244
}
1245
1246
void test_hal_intrin_int8()
1247
{
1248
DUMP_ENTRY(v_int8);
1249
TheTest<v_int8>()
1250
.test_loadstore()
1251
.test_interleave()
1252
.test_expand()
1253
.test_expand_q()
1254
.test_addsub()
1255
.test_arithm_wrap()
1256
.test_mul()
1257
.test_mul_expand()
1258
.test_cmp()
1259
.test_logic()
1260
.test_min_max()
1261
.test_absdiff()
1262
.test_abs()
1263
.test_mask()
1264
.test_popcount()
1265
.test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>()
1266
.test_unpack()
1267
.test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>()
1268
.test_rotate<0>().test_rotate<1>().test_rotate<8>().test_rotate<15>()
1269
;
1270
}
1271
1272
//============= 16-bit integer =====================================================================
1273
1274
void test_hal_intrin_uint16()
1275
{
1276
DUMP_ENTRY(v_uint16);
1277
TheTest<v_uint16>()
1278
.test_loadstore()
1279
.test_interleave()
1280
.test_expand()
1281
.test_addsub()
1282
.test_arithm_wrap()
1283
.test_mul()
1284
.test_mul_expand()
1285
.test_cmp()
1286
.test_shift<1>()
1287
.test_shift<8>()
1288
.test_logic()
1289
.test_min_max()
1290
.test_absdiff()
1291
.test_reduce()
1292
.test_mask()
1293
.test_popcount()
1294
.test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>()
1295
.test_pack_u<1>().test_pack_u<2>().test_pack_u<7>().test_pack_u<16>()
1296
.test_unpack()
1297
.test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>()
1298
.test_rotate<0>().test_rotate<1>().test_rotate<4>().test_rotate<7>()
1299
;
1300
}
1301
1302
void test_hal_intrin_int16()
1303
{
1304
DUMP_ENTRY(v_int16);
1305
TheTest<v_int16>()
1306
.test_loadstore()
1307
.test_interleave()
1308
.test_expand()
1309
.test_addsub()
1310
.test_arithm_wrap()
1311
.test_mul()
1312
.test_mul_expand()
1313
.test_cmp()
1314
.test_shift<1>()
1315
.test_shift<8>()
1316
.test_dot_prod()
1317
.test_logic()
1318
.test_min_max()
1319
.test_absdiff()
1320
.test_abs()
1321
.test_reduce()
1322
.test_mask()
1323
.test_popcount()
1324
.test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>()
1325
.test_unpack()
1326
.test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>()
1327
.test_rotate<0>().test_rotate<1>().test_rotate<4>().test_rotate<7>()
1328
;
1329
}
1330
1331
//============= 32-bit integer =====================================================================
1332
1333
void test_hal_intrin_uint32()
1334
{
1335
DUMP_ENTRY(v_uint32);
1336
TheTest<v_uint32>()
1337
.test_loadstore()
1338
.test_interleave()
1339
.test_expand()
1340
.test_addsub()
1341
.test_mul()
1342
.test_mul_expand()
1343
.test_cmp()
1344
.test_shift<1>()
1345
.test_shift<8>()
1346
.test_logic()
1347
.test_min_max()
1348
.test_absdiff()
1349
.test_reduce()
1350
.test_mask()
1351
.test_popcount()
1352
.test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>()
1353
.test_unpack()
1354
.test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>()
1355
.test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>()
1356
.test_transpose()
1357
;
1358
}
1359
1360
void test_hal_intrin_int32()
1361
{
1362
DUMP_ENTRY(v_int32);
1363
TheTest<v_int32>()
1364
.test_loadstore()
1365
.test_interleave()
1366
.test_expand()
1367
.test_addsub()
1368
.test_mul()
1369
.test_abs()
1370
.test_cmp()
1371
.test_popcount()
1372
.test_shift<1>().test_shift<8>()
1373
.test_logic()
1374
.test_min_max()
1375
.test_absdiff()
1376
.test_reduce()
1377
.test_mask()
1378
.test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>()
1379
.test_unpack()
1380
.test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>()
1381
.test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>()
1382
.test_float_cvt32()
1383
.test_float_cvt64()
1384
.test_transpose()
1385
;
1386
}
1387
1388
//============= 64-bit integer =====================================================================
1389
1390
void test_hal_intrin_uint64()
1391
{
1392
DUMP_ENTRY(v_uint64);
1393
TheTest<v_uint64>()
1394
.test_loadstore()
1395
.test_addsub()
1396
.test_shift<1>().test_shift<8>()
1397
.test_logic()
1398
.test_extract<0>().test_extract<1>()
1399
.test_rotate<0>().test_rotate<1>()
1400
;
1401
}
1402
1403
void test_hal_intrin_int64()
1404
{
1405
DUMP_ENTRY(v_int64);
1406
TheTest<v_int64>()
1407
.test_loadstore()
1408
.test_addsub()
1409
.test_shift<1>().test_shift<8>()
1410
.test_logic()
1411
.test_extract<0>().test_extract<1>()
1412
.test_rotate<0>().test_rotate<1>()
1413
;
1414
}
1415
1416
//============= Floating point =====================================================================
1417
void test_hal_intrin_float32()
1418
{
1419
DUMP_ENTRY(v_float32);
1420
TheTest<v_float32>()
1421
.test_loadstore()
1422
.test_interleave()
1423
.test_interleave_2channel()
1424
.test_addsub()
1425
.test_mul()
1426
.test_div()
1427
.test_cmp()
1428
.test_sqrt_abs()
1429
.test_min_max()
1430
.test_float_absdiff()
1431
.test_reduce()
1432
.test_mask()
1433
.test_unpack()
1434
.test_float_math()
1435
.test_float_cvt64()
1436
.test_matmul()
1437
.test_transpose()
1438
.test_reduce_sum4()
1439
.test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>()
1440
.test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>()
1441
;
1442
1443
#if CV_SIMD_WIDTH == 32
1444
TheTest<v_float32>()
1445
.test_extract<4>().test_extract<5>().test_extract<6>().test_extract<7>()
1446
.test_rotate<4>().test_rotate<5>().test_rotate<6>().test_rotate<7>()
1447
;
1448
#endif
1449
}
1450
1451
void test_hal_intrin_float64()
1452
{
1453
DUMP_ENTRY(v_float64);
1454
#if CV_SIMD_64F
1455
TheTest<v_float64>()
1456
.test_loadstore()
1457
.test_addsub()
1458
.test_mul()
1459
.test_div()
1460
.test_cmp()
1461
.test_sqrt_abs()
1462
.test_min_max()
1463
.test_float_absdiff()
1464
.test_mask()
1465
.test_unpack()
1466
.test_float_math()
1467
.test_float_cvt32()
1468
.test_extract<0>().test_extract<1>()
1469
.test_rotate<0>().test_rotate<1>()
1470
;
1471
1472
#if CV_SIMD_WIDTH == 32
1473
TheTest<v_float64>()
1474
.test_extract<2>().test_extract<3>()
1475
.test_rotate<2>().test_rotate<3>()
1476
;
1477
#endif //CV_SIMD256
1478
1479
#endif
1480
}
1481
1482
#if CV_FP16
1483
void test_hal_intrin_float16()
1484
{
1485
DUMP_ENTRY(v_float16);
1486
#if CV_FP16
1487
TheTest<v_float32>().test_loadstore_fp16_f32();
1488
#endif
1489
#if CV_SIMD_FP16
1490
TheTest<v_float16>()
1491
.test_loadstore_fp16()
1492
.test_float_cvt_fp16()
1493
;
1494
#endif
1495
}
1496
#endif
1497
1498
/*#if defined(CV_CPU_DISPATCH_MODE_FP16) && CV_CPU_DISPATCH_MODE == FP16
1499
void test_hal_intrin_float16()
1500
{
1501
TheTest<v_float16>()
1502
.test_loadstore_fp16()
1503
.test_float_cvt_fp16()
1504
;
1505
}
1506
#endif*/
1507
1508
#endif //CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
1509
1510
//CV_CPU_OPTIMIZATION_NAMESPACE_END
1511
//}}} // namespace
1512
1513