Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/core/src/arithm_core.hpp
16337 views
1
/*M///////////////////////////////////////////////////////////////////////////////////////
2
//
3
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4
//
5
// By downloading, copying, installing or using the software you agree to this license.
6
// If you do not agree to this license, do not download, install,
7
// copy or use the software.
8
//
9
//
10
// License Agreement
11
// For Open Source Computer Vision Library
12
//
13
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
16
// Copyright (C) 2015, Itseez Inc., all rights reserved.
17
// Third party copyrights are property of their respective owners.
18
//
19
// Redistribution and use in source and binary forms, with or without modification,
20
// are permitted provided that the following conditions are met:
21
//
22
// * Redistribution's of source code must retain the above copyright notice,
23
// this list of conditions and the following disclaimer.
24
//
25
// * Redistribution's in binary form must reproduce the above copyright notice,
26
// this list of conditions and the following disclaimer in the documentation
27
// and/or other materials provided with the distribution.
28
//
29
// * The name of the copyright holders may not be used to endorse or promote products
30
// derived from this software without specific prior written permission.
31
//
32
// This software is provided by the copyright holders and contributors "as is" and
33
// any express or implied warranties, including, but not limited to, the implied
34
// warranties of merchantability and fitness for a particular purpose are disclaimed.
35
// In no event shall the Intel Corporation or contributors be liable for any direct,
36
// indirect, incidental, special, exemplary, or consequential damages
37
// (including, but not limited to, procurement of substitute goods or services;
38
// loss of use, data, or profits; or business interruption) however caused
39
// and on any theory of liability, whether in contract, strict liability,
40
// or tort (including negligence or otherwise) arising in any way out of
41
// the use of this software, even if advised of the possibility of such damage.
42
//
43
//M*/
44
45
#ifndef __OPENCV_ARITHM_CORE_HPP__
46
#define __OPENCV_ARITHM_CORE_HPP__
47
48
#include "arithm_simd.hpp"
49
50
namespace cv {
51
52
template<typename T1, typename T2=T1, typename T3=T1> struct OpAdd
53
{
54
typedef T1 type1;
55
typedef T2 type2;
56
typedef T3 rtype;
57
T3 operator ()(const T1 a, const T2 b) const { return saturate_cast<T3>(a + b); }
58
};
59
60
template<typename T1, typename T2=T1, typename T3=T1> struct OpSub
61
{
62
typedef T1 type1;
63
typedef T2 type2;
64
typedef T3 rtype;
65
T3 operator ()(const T1 a, const T2 b) const { return saturate_cast<T3>(a - b); }
66
};
67
68
template<typename T1, typename T2=T1, typename T3=T1> struct OpRSub
69
{
70
typedef T1 type1;
71
typedef T2 type2;
72
typedef T3 rtype;
73
T3 operator ()(const T1 a, const T2 b) const { return saturate_cast<T3>(b - a); }
74
};
75
76
template<typename T> struct OpMin
77
{
78
typedef T type1;
79
typedef T type2;
80
typedef T rtype;
81
T operator ()(const T a, const T b) const { return std::min(a, b); }
82
};
83
84
template<typename T> struct OpMax
85
{
86
typedef T type1;
87
typedef T type2;
88
typedef T rtype;
89
T operator ()(const T a, const T b) const { return std::max(a, b); }
90
};
91
92
template<typename T> struct OpAbsDiff
93
{
94
typedef T type1;
95
typedef T type2;
96
typedef T rtype;
97
T operator()(T a, T b) const { return a > b ? a - b : b - a; }
98
};
99
100
// specializations to prevent "-0" results
101
template<> struct OpAbsDiff<float>
102
{
103
typedef float type1;
104
typedef float type2;
105
typedef float rtype;
106
float operator()(float a, float b) const { return std::abs(a - b); }
107
};
108
template<> struct OpAbsDiff<double>
109
{
110
typedef double type1;
111
typedef double type2;
112
typedef double rtype;
113
double operator()(double a, double b) const { return std::abs(a - b); }
114
};
115
116
template<typename T> struct OpAnd
117
{
118
typedef T type1;
119
typedef T type2;
120
typedef T rtype;
121
T operator()( T a, T b ) const { return a & b; }
122
};
123
124
template<typename T> struct OpOr
125
{
126
typedef T type1;
127
typedef T type2;
128
typedef T rtype;
129
T operator()( T a, T b ) const { return a | b; }
130
};
131
132
template<typename T> struct OpXor
133
{
134
typedef T type1;
135
typedef T type2;
136
typedef T rtype;
137
T operator()( T a, T b ) const { return a ^ b; }
138
};
139
140
template<typename T> struct OpNot
141
{
142
typedef T type1;
143
typedef T type2;
144
typedef T rtype;
145
T operator()( T a, T ) const { return ~a; }
146
};
147
148
//=============================================================================
149
150
template<typename T, class Op, class VOp>
151
void vBinOp(const T* src1, size_t step1, const T* src2, size_t step2, T* dst, size_t step, int width, int height)
152
{
153
#if CV_SSE2 || CV_NEON
154
VOp vop;
155
#endif
156
Op op;
157
158
for( ; height--; src1 = (const T *)((const uchar *)src1 + step1),
159
src2 = (const T *)((const uchar *)src2 + step2),
160
dst = (T *)((uchar *)dst + step) )
161
{
162
int x = 0;
163
164
#if CV_NEON || CV_SSE2
165
#if CV_AVX2
166
if( USE_AVX2 )
167
{
168
for( ; x <= width - 32/(int)sizeof(T); x += 32/sizeof(T) )
169
{
170
typename VLoadStore256<T>::reg_type r0 = VLoadStore256<T>::load(src1 + x);
171
r0 = vop(r0, VLoadStore256<T>::load(src2 + x));
172
VLoadStore256<T>::store(dst + x, r0);
173
}
174
}
175
#else
176
#if CV_SSE2
177
if( USE_SSE2 )
178
{
179
#endif // CV_SSE2
180
for( ; x <= width - 32/(int)sizeof(T); x += 32/sizeof(T) )
181
{
182
typename VLoadStore128<T>::reg_type r0 = VLoadStore128<T>::load(src1 + x );
183
typename VLoadStore128<T>::reg_type r1 = VLoadStore128<T>::load(src1 + x + 16/sizeof(T));
184
r0 = vop(r0, VLoadStore128<T>::load(src2 + x ));
185
r1 = vop(r1, VLoadStore128<T>::load(src2 + x + 16/sizeof(T)));
186
VLoadStore128<T>::store(dst + x , r0);
187
VLoadStore128<T>::store(dst + x + 16/sizeof(T), r1);
188
}
189
#if CV_SSE2
190
}
191
#endif // CV_SSE2
192
#endif // CV_AVX2
193
#endif // CV_NEON || CV_SSE2
194
195
#if CV_AVX2
196
// nothing
197
#elif CV_SSE2
198
if( USE_SSE2 )
199
{
200
for( ; x <= width - 8/(int)sizeof(T); x += 8/sizeof(T) )
201
{
202
typename VLoadStore64<T>::reg_type r = VLoadStore64<T>::load(src1 + x);
203
r = vop(r, VLoadStore64<T>::load(src2 + x));
204
VLoadStore64<T>::store(dst + x, r);
205
}
206
}
207
#endif
208
209
#if CV_ENABLE_UNROLLED
210
for( ; x <= width - 4; x += 4 )
211
{
212
T v0 = op(src1[x], src2[x]);
213
T v1 = op(src1[x+1], src2[x+1]);
214
dst[x] = v0; dst[x+1] = v1;
215
v0 = op(src1[x+2], src2[x+2]);
216
v1 = op(src1[x+3], src2[x+3]);
217
dst[x+2] = v0; dst[x+3] = v1;
218
}
219
#endif
220
221
for( ; x < width; x++ )
222
dst[x] = op(src1[x], src2[x]);
223
}
224
}
225
226
template<typename T, class Op, class Op32>
227
void vBinOp32(const T* src1, size_t step1, const T* src2, size_t step2,
228
T* dst, size_t step, int width, int height)
229
{
230
#if CV_SSE2 || CV_NEON
231
Op32 op32;
232
#endif
233
Op op;
234
235
for( ; height--; src1 = (const T *)((const uchar *)src1 + step1),
236
src2 = (const T *)((const uchar *)src2 + step2),
237
dst = (T *)((uchar *)dst + step) )
238
{
239
int x = 0;
240
241
#if CV_AVX2
242
if( USE_AVX2 )
243
{
244
if( (((size_t)src1|(size_t)src2|(size_t)dst)&31) == 0 )
245
{
246
for( ; x <= width - 8; x += 8 )
247
{
248
typename VLoadStore256Aligned<T>::reg_type r0 = VLoadStore256Aligned<T>::load(src1 + x);
249
r0 = op32(r0, VLoadStore256Aligned<T>::load(src2 + x));
250
VLoadStore256Aligned<T>::store(dst + x, r0);
251
}
252
}
253
}
254
#elif CV_SSE2
255
if( USE_SSE2 )
256
{
257
if( (((size_t)src1|(size_t)src2|(size_t)dst)&15) == 0 )
258
{
259
for( ; x <= width - 8; x += 8 )
260
{
261
typename VLoadStore128Aligned<T>::reg_type r0 = VLoadStore128Aligned<T>::load(src1 + x );
262
typename VLoadStore128Aligned<T>::reg_type r1 = VLoadStore128Aligned<T>::load(src1 + x + 4);
263
r0 = op32(r0, VLoadStore128Aligned<T>::load(src2 + x ));
264
r1 = op32(r1, VLoadStore128Aligned<T>::load(src2 + x + 4));
265
VLoadStore128Aligned<T>::store(dst + x , r0);
266
VLoadStore128Aligned<T>::store(dst + x + 4, r1);
267
}
268
}
269
}
270
#endif // CV_AVX2
271
272
#if CV_NEON || CV_SSE2
273
#if CV_AVX2
274
if( USE_AVX2 )
275
{
276
for( ; x <= width - 8; x += 8 )
277
{
278
typename VLoadStore256<T>::reg_type r0 = VLoadStore256<T>::load(src1 + x);
279
r0 = op32(r0, VLoadStore256<T>::load(src2 + x));
280
VLoadStore256<T>::store(dst + x, r0);
281
}
282
}
283
#else
284
#if CV_SSE2
285
if( USE_SSE2 )
286
{
287
#endif // CV_SSE2
288
for( ; x <= width - 8; x += 8 )
289
{
290
typename VLoadStore128<T>::reg_type r0 = VLoadStore128<T>::load(src1 + x );
291
typename VLoadStore128<T>::reg_type r1 = VLoadStore128<T>::load(src1 + x + 4);
292
r0 = op32(r0, VLoadStore128<T>::load(src2 + x ));
293
r1 = op32(r1, VLoadStore128<T>::load(src2 + x + 4));
294
VLoadStore128<T>::store(dst + x , r0);
295
VLoadStore128<T>::store(dst + x + 4, r1);
296
}
297
#if CV_SSE2
298
}
299
#endif // CV_SSE2
300
#endif // CV_AVX2
301
#endif // CV_NEON || CV_SSE2
302
303
#if CV_ENABLE_UNROLLED
304
for( ; x <= width - 4; x += 4 )
305
{
306
T v0 = op(src1[x], src2[x]);
307
T v1 = op(src1[x+1], src2[x+1]);
308
dst[x] = v0; dst[x+1] = v1;
309
v0 = op(src1[x+2], src2[x+2]);
310
v1 = op(src1[x+3], src2[x+3]);
311
dst[x+2] = v0; dst[x+3] = v1;
312
}
313
#endif
314
315
for( ; x < width; x++ )
316
dst[x] = op(src1[x], src2[x]);
317
}
318
}
319
320
321
template<typename T, class Op, class Op64>
322
void vBinOp64(const T* src1, size_t step1, const T* src2, size_t step2,
323
T* dst, size_t step, int width, int height)
324
{
325
#if CV_SSE2
326
Op64 op64;
327
#endif
328
Op op;
329
330
for( ; height--; src1 = (const T *)((const uchar *)src1 + step1),
331
src2 = (const T *)((const uchar *)src2 + step2),
332
dst = (T *)((uchar *)dst + step) )
333
{
334
int x = 0;
335
336
#if CV_AVX2
337
if( USE_AVX2 )
338
{
339
if( (((size_t)src1|(size_t)src2|(size_t)dst)&31) == 0 )
340
{
341
for( ; x <= width - 4; x += 4 )
342
{
343
typename VLoadStore256Aligned<T>::reg_type r0 = VLoadStore256Aligned<T>::load(src1 + x);
344
r0 = op64(r0, VLoadStore256Aligned<T>::load(src2 + x));
345
VLoadStore256Aligned<T>::store(dst + x, r0);
346
}
347
}
348
}
349
#elif CV_SSE2
350
if( USE_SSE2 )
351
{
352
if( (((size_t)src1|(size_t)src2|(size_t)dst)&15) == 0 )
353
{
354
for( ; x <= width - 4; x += 4 )
355
{
356
typename VLoadStore128Aligned<T>::reg_type r0 = VLoadStore128Aligned<T>::load(src1 + x );
357
typename VLoadStore128Aligned<T>::reg_type r1 = VLoadStore128Aligned<T>::load(src1 + x + 2);
358
r0 = op64(r0, VLoadStore128Aligned<T>::load(src2 + x ));
359
r1 = op64(r1, VLoadStore128Aligned<T>::load(src2 + x + 2));
360
VLoadStore128Aligned<T>::store(dst + x , r0);
361
VLoadStore128Aligned<T>::store(dst + x + 2, r1);
362
}
363
}
364
}
365
#endif
366
367
for( ; x <= width - 4; x += 4 )
368
{
369
T v0 = op(src1[x], src2[x]);
370
T v1 = op(src1[x+1], src2[x+1]);
371
dst[x] = v0; dst[x+1] = v1;
372
v0 = op(src1[x+2], src2[x+2]);
373
v1 = op(src1[x+3], src2[x+3]);
374
dst[x+2] = v0; dst[x+3] = v1;
375
}
376
377
for( ; x < width; x++ )
378
dst[x] = op(src1[x], src2[x]);
379
}
380
}
381
382
template<typename T> static void
383
cmp_(const T* src1, size_t step1, const T* src2, size_t step2,
384
uchar* dst, size_t step, int width, int height, int code)
385
{
386
step1 /= sizeof(src1[0]);
387
step2 /= sizeof(src2[0]);
388
if( code == CMP_GE || code == CMP_LT )
389
{
390
std::swap(src1, src2);
391
std::swap(step1, step2);
392
code = code == CMP_GE ? CMP_LE : CMP_GT;
393
}
394
395
Cmp_SIMD<T> vop(code);
396
397
if( code == CMP_GT || code == CMP_LE )
398
{
399
int m = code == CMP_GT ? 0 : 255;
400
for( ; height--; src1 += step1, src2 += step2, dst += step )
401
{
402
int x = vop(src1, src2, dst, width);
403
#if CV_ENABLE_UNROLLED
404
for( ; x <= width - 4; x += 4 )
405
{
406
int t0, t1;
407
t0 = -(src1[x] > src2[x]) ^ m;
408
t1 = -(src1[x+1] > src2[x+1]) ^ m;
409
dst[x] = (uchar)t0; dst[x+1] = (uchar)t1;
410
t0 = -(src1[x+2] > src2[x+2]) ^ m;
411
t1 = -(src1[x+3] > src2[x+3]) ^ m;
412
dst[x+2] = (uchar)t0; dst[x+3] = (uchar)t1;
413
}
414
#endif
415
for( ; x < width; x++ )
416
dst[x] = (uchar)(-(src1[x] > src2[x]) ^ m);
417
}
418
}
419
else if( code == CMP_EQ || code == CMP_NE )
420
{
421
int m = code == CMP_EQ ? 0 : 255;
422
for( ; height--; src1 += step1, src2 += step2, dst += step )
423
{
424
int x = 0;
425
#if CV_ENABLE_UNROLLED
426
for( ; x <= width - 4; x += 4 )
427
{
428
int t0, t1;
429
t0 = -(src1[x] == src2[x]) ^ m;
430
t1 = -(src1[x+1] == src2[x+1]) ^ m;
431
dst[x] = (uchar)t0; dst[x+1] = (uchar)t1;
432
t0 = -(src1[x+2] == src2[x+2]) ^ m;
433
t1 = -(src1[x+3] == src2[x+3]) ^ m;
434
dst[x+2] = (uchar)t0; dst[x+3] = (uchar)t1;
435
}
436
#endif
437
for( ; x < width; x++ )
438
dst[x] = (uchar)(-(src1[x] == src2[x]) ^ m);
439
}
440
}
441
}
442
443
template<typename T, typename WT> static void
444
mul_( const T* src1, size_t step1, const T* src2, size_t step2,
445
T* dst, size_t step, int width, int height, WT scale )
446
{
447
step1 /= sizeof(src1[0]);
448
step2 /= sizeof(src2[0]);
449
step /= sizeof(dst[0]);
450
451
Mul_SIMD<T, WT> vop;
452
453
if( scale == (WT)1. )
454
{
455
for( ; height--; src1 += step1, src2 += step2, dst += step )
456
{
457
int i = vop(src1, src2, dst, width, scale);
458
#if CV_ENABLE_UNROLLED
459
for(; i <= width - 4; i += 4 )
460
{
461
T t0;
462
T t1;
463
t0 = saturate_cast<T>(src1[i ] * src2[i ]);
464
t1 = saturate_cast<T>(src1[i+1] * src2[i+1]);
465
dst[i ] = t0;
466
dst[i+1] = t1;
467
468
t0 = saturate_cast<T>(src1[i+2] * src2[i+2]);
469
t1 = saturate_cast<T>(src1[i+3] * src2[i+3]);
470
dst[i+2] = t0;
471
dst[i+3] = t1;
472
}
473
#endif
474
for( ; i < width; i++ )
475
dst[i] = saturate_cast<T>(src1[i] * src2[i]);
476
}
477
}
478
else
479
{
480
for( ; height--; src1 += step1, src2 += step2, dst += step )
481
{
482
int i = vop(src1, src2, dst, width, scale);
483
#if CV_ENABLE_UNROLLED
484
for(; i <= width - 4; i += 4 )
485
{
486
T t0 = saturate_cast<T>(scale*(WT)src1[i]*src2[i]);
487
T t1 = saturate_cast<T>(scale*(WT)src1[i+1]*src2[i+1]);
488
dst[i] = t0; dst[i+1] = t1;
489
490
t0 = saturate_cast<T>(scale*(WT)src1[i+2]*src2[i+2]);
491
t1 = saturate_cast<T>(scale*(WT)src1[i+3]*src2[i+3]);
492
dst[i+2] = t0; dst[i+3] = t1;
493
}
494
#endif
495
for( ; i < width; i++ )
496
dst[i] = saturate_cast<T>(scale*(WT)src1[i]*src2[i]);
497
}
498
}
499
}
500
501
502
template<typename T> static void
503
div_i( const T* src1, size_t step1, const T* src2, size_t step2,
504
T* dst, size_t step, int width, int height, double scale )
505
{
506
step1 /= sizeof(src1[0]);
507
step2 /= sizeof(src2[0]);
508
step /= sizeof(dst[0]);
509
510
Div_SIMD<T> vop;
511
float scale_f = (float)scale;
512
513
for( ; height--; src1 += step1, src2 += step2, dst += step )
514
{
515
int i = vop(src1, src2, dst, width, scale);
516
for( ; i < width; i++ )
517
{
518
T num = src1[i], denom = src2[i];
519
T v = 0;
520
if (denom != 0)
521
v = saturate_cast<T>(num*scale_f/denom);
522
dst[i] = v;
523
}
524
}
525
}
526
527
template<typename T> static void
528
div_f( const T* src1, size_t step1, const T* src2, size_t step2,
529
T* dst, size_t step, int width, int height, double scale )
530
{
531
T scale_f = (T)scale;
532
step1 /= sizeof(src1[0]);
533
step2 /= sizeof(src2[0]);
534
step /= sizeof(dst[0]);
535
536
Div_SIMD<T> vop;
537
538
for( ; height--; src1 += step1, src2 += step2, dst += step )
539
{
540
int i = vop(src1, src2, dst, width, scale);
541
for( ; i < width; i++ )
542
{
543
T num = src1[i], denom = src2[i];
544
dst[i] = saturate_cast<T>(num*scale_f/denom);
545
}
546
}
547
}
548
549
template<typename T> static void
550
recip_i( const T* src2, size_t step2,
551
T* dst, size_t step, int width, int height, double scale )
552
{
553
step2 /= sizeof(src2[0]);
554
step /= sizeof(dst[0]);
555
556
Recip_SIMD<T> vop;
557
float scale_f = (float)scale;
558
559
for( ; height--; src2 += step2, dst += step )
560
{
561
int i = vop(src2, dst, width, scale);
562
for( ; i < width; i++ )
563
{
564
T denom = src2[i];
565
T v = 0;
566
if (denom != 0)
567
v = saturate_cast<T>(scale_f/denom);
568
dst[i] = v;
569
}
570
}
571
}
572
573
template<typename T> static void
574
recip_f( const T* src2, size_t step2,
575
T* dst, size_t step, int width, int height, double scale )
576
{
577
T scale_f = (T)scale;
578
step2 /= sizeof(src2[0]);
579
step /= sizeof(dst[0]);
580
581
Recip_SIMD<T> vop;
582
583
for( ; height--; src2 += step2, dst += step )
584
{
585
int i = vop(src2, dst, width, scale);
586
for( ; i < width; i++ )
587
{
588
T denom = src2[i];
589
dst[i] = saturate_cast<T>(scale_f/denom);
590
}
591
}
592
}
593
594
template<typename T, typename WT> static void
595
addWeighted_( const T* src1, size_t step1, const T* src2, size_t step2,
596
T* dst, size_t step, int width, int height, void* _scalars )
597
{
598
const double* scalars = (const double*)_scalars;
599
WT alpha = (WT)scalars[0], beta = (WT)scalars[1], gamma = (WT)scalars[2];
600
step1 /= sizeof(src1[0]);
601
step2 /= sizeof(src2[0]);
602
step /= sizeof(dst[0]);
603
604
AddWeighted_SIMD<T, WT> vop;
605
606
for( ; height--; src1 += step1, src2 += step2, dst += step )
607
{
608
int x = vop(src1, src2, dst, width, alpha, beta, gamma);
609
#if CV_ENABLE_UNROLLED
610
for( ; x <= width - 4; x += 4 )
611
{
612
T t0 = saturate_cast<T>(src1[x]*alpha + src2[x]*beta + gamma);
613
T t1 = saturate_cast<T>(src1[x+1]*alpha + src2[x+1]*beta + gamma);
614
dst[x] = t0; dst[x+1] = t1;
615
616
t0 = saturate_cast<T>(src1[x+2]*alpha + src2[x+2]*beta + gamma);
617
t1 = saturate_cast<T>(src1[x+3]*alpha + src2[x+3]*beta + gamma);
618
dst[x+2] = t0; dst[x+3] = t1;
619
}
620
#endif
621
for( ; x < width; x++ )
622
dst[x] = saturate_cast<T>(src1[x]*alpha + src2[x]*beta + gamma);
623
}
624
}
625
626
} // cv::
627
628
629
#endif // __OPENCV_ARITHM_CORE_HPP__
630
631