CoCalc -- arithm

GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/core/src/arithm_core.hpp
¹⁶³³⁷ views
1
/*M///////////////////////////////////////////////////////////////////////////////////////
2
//
3
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4
//
5
//  By downloading, copying, installing or using the software you agree to this license.
6
//  If you do not agree to this license, do not download, install,
7
//  copy or use the software.
8
//
9
//
10
//                          License Agreement
11
//                For Open Source Computer Vision Library
12
//
13
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
16
// Copyright (C) 2015, Itseez Inc., all rights reserved.
17
// Third party copyrights are property of their respective owners.
18
//
19
// Redistribution and use in source and binary forms, with or without modification,
20
// are permitted provided that the following conditions are met:
21
//
22
//   * Redistribution's of source code must retain the above copyright notice,
23
//     this list of conditions and the following disclaimer.
24
//
25
//   * Redistribution's in binary form must reproduce the above copyright notice,
26
//     this list of conditions and the following disclaimer in the documentation
27
//     and/or other materials provided with the distribution.
28
//
29
//   * The name of the copyright holders may not be used to endorse or promote products
30
//     derived from this software without specific prior written permission.
31
//
32
// This software is provided by the copyright holders and contributors "as is" and
33
// any express or implied warranties, including, but not limited to, the implied
34
// warranties of merchantability and fitness for a particular purpose are disclaimed.
35
// In no event shall the Intel Corporation or contributors be liable for any direct,
36
// indirect, incidental, special, exemplary, or consequential damages
37
// (including, but not limited to, procurement of substitute goods or services;
38
// loss of use, data, or profits; or business interruption) however caused
39
// and on any theory of liability, whether in contract, strict liability,
40
// or tort (including negligence or otherwise) arising in any way out of
41
// the use of this software, even if advised of the possibility of such damage.
42
//
43
//M*/
44

45
#ifndef __OPENCV_ARITHM_CORE_HPP__
46
#define __OPENCV_ARITHM_CORE_HPP__
47

48
#include "arithm_simd.hpp"
49

50
namespace cv {
51

52
template<typename T1, typename T2=T1, typename T3=T1> struct OpAdd
53
{
54
    typedef T1 type1;
55
    typedef T2 type2;
56
    typedef T3 rtype;
57
    T3 operator ()(const T1 a, const T2 b) const { return saturate_cast<T3>(a + b); }
58
};
59

60
template<typename T1, typename T2=T1, typename T3=T1> struct OpSub
61
{
62
    typedef T1 type1;
63
    typedef T2 type2;
64
    typedef T3 rtype;
65
    T3 operator ()(const T1 a, const T2 b) const { return saturate_cast<T3>(a - b); }
66
};
67

68
template<typename T1, typename T2=T1, typename T3=T1> struct OpRSub
69
{
70
    typedef T1 type1;
71
    typedef T2 type2;
72
    typedef T3 rtype;
73
    T3 operator ()(const T1 a, const T2 b) const { return saturate_cast<T3>(b - a); }
74
};
75

76
template<typename T> struct OpMin
77
{
78
    typedef T type1;
79
    typedef T type2;
80
    typedef T rtype;
81
    T operator ()(const T a, const T b) const { return std::min(a, b); }
82
};
83

84
template<typename T> struct OpMax
85
{
86
    typedef T type1;
87
    typedef T type2;
88
    typedef T rtype;
89
    T operator ()(const T a, const T b) const { return std::max(a, b); }
90
};
91

92
template<typename T> struct OpAbsDiff
93
{
94
    typedef T type1;
95
    typedef T type2;
96
    typedef T rtype;
97
    T operator()(T a, T b) const { return a > b ? a - b : b - a; }
98
};
99

100
// specializations to prevent "-0" results
101
template<> struct OpAbsDiff<float>
102
{
103
    typedef float type1;
104
    typedef float type2;
105
    typedef float rtype;
106
    float operator()(float a, float b) const { return std::abs(a - b); }
107
};
108
template<> struct OpAbsDiff<double>
109
{
110
    typedef double type1;
111
    typedef double type2;
112
    typedef double rtype;
113
    double operator()(double a, double b) const { return std::abs(a - b); }
114
};
115

116
template<typename T> struct OpAnd
117
{
118
    typedef T type1;
119
    typedef T type2;
120
    typedef T rtype;
121
    T operator()( T a, T b ) const { return a & b; }
122
};
123

124
template<typename T> struct OpOr
125
{
126
    typedef T type1;
127
    typedef T type2;
128
    typedef T rtype;
129
    T operator()( T a, T b ) const { return a | b; }
130
};
131

132
template<typename T> struct OpXor
133
{
134
    typedef T type1;
135
    typedef T type2;
136
    typedef T rtype;
137
    T operator()( T a, T b ) const { return a ^ b; }
138
};
139

140
template<typename T> struct OpNot
141
{
142
    typedef T type1;
143
    typedef T type2;
144
    typedef T rtype;
145
    T operator()( T a, T ) const { return ~a; }
146
};
147

148
//=============================================================================
149

150
template<typename T, class Op, class VOp>
151
void vBinOp(const T* src1, size_t step1, const T* src2, size_t step2, T* dst, size_t step, int width, int height)
152
{
153
#if CV_SSE2 || CV_NEON
154
    VOp vop;
155
#endif
156
    Op op;
157

158
    for( ; height--; src1 = (const T *)((const uchar *)src1 + step1),
159
                        src2 = (const T *)((const uchar *)src2 + step2),
160
                        dst = (T *)((uchar *)dst + step) )
161
    {
162
        int x = 0;
163

164
#if CV_NEON || CV_SSE2
165
#if CV_AVX2
166
        if( USE_AVX2 )
167
        {
168
            for( ; x <= width - 32/(int)sizeof(T); x += 32/sizeof(T) )
169
            {
170
                typename VLoadStore256<T>::reg_type r0 = VLoadStore256<T>::load(src1 + x);
171
                r0 = vop(r0, VLoadStore256<T>::load(src2 + x));
172
                VLoadStore256<T>::store(dst + x, r0);
173
            }
174
        }
175
#else
176
#if CV_SSE2
177
        if( USE_SSE2 )
178
        {
179
#endif // CV_SSE2
180
            for( ; x <= width - 32/(int)sizeof(T); x += 32/sizeof(T) )
181
            {
182
                typename VLoadStore128<T>::reg_type r0 = VLoadStore128<T>::load(src1 + x               );
183
                typename VLoadStore128<T>::reg_type r1 = VLoadStore128<T>::load(src1 + x + 16/sizeof(T));
184
                r0 = vop(r0, VLoadStore128<T>::load(src2 + x               ));
185
                r1 = vop(r1, VLoadStore128<T>::load(src2 + x + 16/sizeof(T)));
186
                VLoadStore128<T>::store(dst + x               , r0);
187
                VLoadStore128<T>::store(dst + x + 16/sizeof(T), r1);
188
            }
189
#if CV_SSE2
190
        }
191
#endif // CV_SSE2
192
#endif // CV_AVX2
193
#endif // CV_NEON || CV_SSE2
194

195
#if CV_AVX2
196
        // nothing
197
#elif CV_SSE2
198
        if( USE_SSE2 )
199
        {
200
            for( ; x <= width - 8/(int)sizeof(T); x += 8/sizeof(T) )
201
            {
202
                typename VLoadStore64<T>::reg_type r = VLoadStore64<T>::load(src1 + x);
203
                r = vop(r, VLoadStore64<T>::load(src2 + x));
204
                VLoadStore64<T>::store(dst + x, r);
205
            }
206
        }
207
#endif
208

209
#if CV_ENABLE_UNROLLED
210
        for( ; x <= width - 4; x += 4 )
211
        {
212
            T v0 = op(src1[x], src2[x]);
213
            T v1 = op(src1[x+1], src2[x+1]);
214
            dst[x] = v0; dst[x+1] = v1;
215
            v0 = op(src1[x+2], src2[x+2]);
216
            v1 = op(src1[x+3], src2[x+3]);
217
            dst[x+2] = v0; dst[x+3] = v1;
218
        }
219
#endif
220

221
        for( ; x < width; x++ )
222
            dst[x] = op(src1[x], src2[x]);
223
    }
224
}
225

226
template<typename T, class Op, class Op32>
227
void vBinOp32(const T* src1, size_t step1, const T* src2, size_t step2,
228
              T* dst, size_t step, int width, int height)
229
{
230
#if CV_SSE2 || CV_NEON
231
    Op32 op32;
232
#endif
233
    Op op;
234

235
    for( ; height--; src1 = (const T *)((const uchar *)src1 + step1),
236
                        src2 = (const T *)((const uchar *)src2 + step2),
237
                        dst = (T *)((uchar *)dst + step) )
238
    {
239
        int x = 0;
240

241
#if CV_AVX2
242
        if( USE_AVX2 )
243
        {
244
            if( (((size_t)src1|(size_t)src2|(size_t)dst)&31) == 0 )
245
            {
246
                for( ; x <= width - 8; x += 8 )
247
                {
248
                    typename VLoadStore256Aligned<T>::reg_type r0 = VLoadStore256Aligned<T>::load(src1 + x);
249
                    r0 = op32(r0, VLoadStore256Aligned<T>::load(src2 + x));
250
                    VLoadStore256Aligned<T>::store(dst + x, r0);
251
                }
252
            }
253
        }
254
#elif CV_SSE2
255
        if( USE_SSE2 )
256
        {
257
            if( (((size_t)src1|(size_t)src2|(size_t)dst)&15) == 0 )
258
            {
259
                for( ; x <= width - 8; x += 8 )
260
                {
261
                    typename VLoadStore128Aligned<T>::reg_type r0 = VLoadStore128Aligned<T>::load(src1 + x    );
262
                    typename VLoadStore128Aligned<T>::reg_type r1 = VLoadStore128Aligned<T>::load(src1 + x + 4);
263
                    r0 = op32(r0, VLoadStore128Aligned<T>::load(src2 + x    ));
264
                    r1 = op32(r1, VLoadStore128Aligned<T>::load(src2 + x + 4));
265
                    VLoadStore128Aligned<T>::store(dst + x    , r0);
266
                    VLoadStore128Aligned<T>::store(dst + x + 4, r1);
267
                }
268
            }
269
        }
270
#endif // CV_AVX2
271

272
#if CV_NEON || CV_SSE2
273
#if CV_AVX2
274
        if( USE_AVX2 )
275
        {
276
            for( ; x <= width - 8; x += 8 )
277
            {
278
                typename VLoadStore256<T>::reg_type r0 = VLoadStore256<T>::load(src1 + x);
279
                r0 = op32(r0, VLoadStore256<T>::load(src2 + x));
280
                VLoadStore256<T>::store(dst + x, r0);
281
            }
282
        }
283
#else
284
#if CV_SSE2
285
        if( USE_SSE2 )
286
        {
287
#endif // CV_SSE2
288
            for( ; x <= width - 8; x += 8 )
289
            {
290
                typename VLoadStore128<T>::reg_type r0 = VLoadStore128<T>::load(src1 + x    );
291
                typename VLoadStore128<T>::reg_type r1 = VLoadStore128<T>::load(src1 + x + 4);
292
                r0 = op32(r0, VLoadStore128<T>::load(src2 + x    ));
293
                r1 = op32(r1, VLoadStore128<T>::load(src2 + x + 4));
294
                VLoadStore128<T>::store(dst + x    , r0);
295
                VLoadStore128<T>::store(dst + x + 4, r1);
296
            }
297
#if CV_SSE2
298
        }
299
#endif // CV_SSE2
300
#endif // CV_AVX2
301
#endif // CV_NEON || CV_SSE2
302

303
#if CV_ENABLE_UNROLLED
304
        for( ; x <= width - 4; x += 4 )
305
        {
306
            T v0 = op(src1[x], src2[x]);
307
            T v1 = op(src1[x+1], src2[x+1]);
308
            dst[x] = v0; dst[x+1] = v1;
309
            v0 = op(src1[x+2], src2[x+2]);
310
            v1 = op(src1[x+3], src2[x+3]);
311
            dst[x+2] = v0; dst[x+3] = v1;
312
        }
313
#endif
314

315
        for( ; x < width; x++ )
316
            dst[x] = op(src1[x], src2[x]);
317
    }
318
}
319

320

321
template<typename T, class Op, class Op64>
322
void vBinOp64(const T* src1, size_t step1, const T* src2, size_t step2,
323
               T* dst, size_t step, int width, int height)
324
{
325
#if CV_SSE2
326
    Op64 op64;
327
#endif
328
    Op op;
329

330
    for( ; height--; src1 = (const T *)((const uchar *)src1 + step1),
331
                        src2 = (const T *)((const uchar *)src2 + step2),
332
                        dst = (T *)((uchar *)dst + step) )
333
    {
334
        int x = 0;
335

336
#if CV_AVX2
337
        if( USE_AVX2 )
338
        {
339
            if( (((size_t)src1|(size_t)src2|(size_t)dst)&31) == 0 )
340
            {
341
                for( ; x <= width - 4; x += 4 )
342
                {
343
                    typename VLoadStore256Aligned<T>::reg_type r0 = VLoadStore256Aligned<T>::load(src1 + x);
344
                    r0 = op64(r0, VLoadStore256Aligned<T>::load(src2 + x));
345
                    VLoadStore256Aligned<T>::store(dst + x, r0);
346
                }
347
            }
348
        }
349
#elif CV_SSE2
350
        if( USE_SSE2 )
351
        {
352
            if( (((size_t)src1|(size_t)src2|(size_t)dst)&15) == 0 )
353
            {
354
                for( ; x <= width - 4; x += 4 )
355
                {
356
                    typename VLoadStore128Aligned<T>::reg_type r0 = VLoadStore128Aligned<T>::load(src1 + x    );
357
                    typename VLoadStore128Aligned<T>::reg_type r1 = VLoadStore128Aligned<T>::load(src1 + x + 2);
358
                    r0 = op64(r0, VLoadStore128Aligned<T>::load(src2 + x    ));
359
                    r1 = op64(r1, VLoadStore128Aligned<T>::load(src2 + x + 2));
360
                    VLoadStore128Aligned<T>::store(dst + x    , r0);
361
                    VLoadStore128Aligned<T>::store(dst + x + 2, r1);
362
                }
363
            }
364
        }
365
#endif
366

367
        for( ; x <= width - 4; x += 4 )
368
        {
369
            T v0 = op(src1[x], src2[x]);
370
            T v1 = op(src1[x+1], src2[x+1]);
371
            dst[x] = v0; dst[x+1] = v1;
372
            v0 = op(src1[x+2], src2[x+2]);
373
            v1 = op(src1[x+3], src2[x+3]);
374
            dst[x+2] = v0; dst[x+3] = v1;
375
        }
376

377
        for( ; x < width; x++ )
378
            dst[x] = op(src1[x], src2[x]);
379
    }
380
}
381

382
template<typename T> static void
383
cmp_(const T* src1, size_t step1, const T* src2, size_t step2,
384
     uchar* dst, size_t step, int width, int height, int code)
385
{
386
    step1 /= sizeof(src1[0]);
387
    step2 /= sizeof(src2[0]);
388
    if( code == CMP_GE || code == CMP_LT )
389
    {
390
        std::swap(src1, src2);
391
        std::swap(step1, step2);
392
        code = code == CMP_GE ? CMP_LE : CMP_GT;
393
    }
394

395
    Cmp_SIMD<T> vop(code);
396

397
    if( code == CMP_GT || code == CMP_LE )
398
    {
399
        int m = code == CMP_GT ? 0 : 255;
400
        for( ; height--; src1 += step1, src2 += step2, dst += step )
401
        {
402
            int x = vop(src1, src2, dst, width);
403
            #if CV_ENABLE_UNROLLED
404
            for( ; x <= width - 4; x += 4 )
405
            {
406
                int t0, t1;
407
                t0 = -(src1[x] > src2[x]) ^ m;
408
                t1 = -(src1[x+1] > src2[x+1]) ^ m;
409
                dst[x] = (uchar)t0; dst[x+1] = (uchar)t1;
410
                t0 = -(src1[x+2] > src2[x+2]) ^ m;
411
                t1 = -(src1[x+3] > src2[x+3]) ^ m;
412
                dst[x+2] = (uchar)t0; dst[x+3] = (uchar)t1;
413
            }
414
            #endif
415
            for( ; x < width; x++ )
416
                dst[x] = (uchar)(-(src1[x] > src2[x]) ^ m);
417
        }
418
    }
419
    else if( code == CMP_EQ || code == CMP_NE )
420
    {
421
        int m = code == CMP_EQ ? 0 : 255;
422
        for( ; height--; src1 += step1, src2 += step2, dst += step )
423
        {
424
            int x = 0;
425
            #if CV_ENABLE_UNROLLED
426
            for( ; x <= width - 4; x += 4 )
427
            {
428
                int t0, t1;
429
                t0 = -(src1[x] == src2[x]) ^ m;
430
                t1 = -(src1[x+1] == src2[x+1]) ^ m;
431
                dst[x] = (uchar)t0; dst[x+1] = (uchar)t1;
432
                t0 = -(src1[x+2] == src2[x+2]) ^ m;
433
                t1 = -(src1[x+3] == src2[x+3]) ^ m;
434
                dst[x+2] = (uchar)t0; dst[x+3] = (uchar)t1;
435
            }
436
            #endif
437
            for( ; x < width; x++ )
438
                dst[x] = (uchar)(-(src1[x] == src2[x]) ^ m);
439
        }
440
    }
441
}
442

443
template<typename T, typename WT> static void
444
mul_( const T* src1, size_t step1, const T* src2, size_t step2,
445
      T* dst, size_t step, int width, int height, WT scale )
446
{
447
    step1 /= sizeof(src1[0]);
448
    step2 /= sizeof(src2[0]);
449
    step /= sizeof(dst[0]);
450

451
    Mul_SIMD<T, WT> vop;
452

453
    if( scale == (WT)1. )
454
    {
455
        for( ; height--; src1 += step1, src2 += step2, dst += step )
456
        {
457
            int i = vop(src1, src2, dst, width, scale);
458
            #if CV_ENABLE_UNROLLED
459
            for(; i <= width - 4; i += 4 )
460
            {
461
                T t0;
462
                T t1;
463
                t0 = saturate_cast<T>(src1[i  ] * src2[i  ]);
464
                t1 = saturate_cast<T>(src1[i+1] * src2[i+1]);
465
                dst[i  ] = t0;
466
                dst[i+1] = t1;
467

468
                t0 = saturate_cast<T>(src1[i+2] * src2[i+2]);
469
                t1 = saturate_cast<T>(src1[i+3] * src2[i+3]);
470
                dst[i+2] = t0;
471
                dst[i+3] = t1;
472
            }
473
            #endif
474
            for( ; i < width; i++ )
475
                dst[i] = saturate_cast<T>(src1[i] * src2[i]);
476
        }
477
    }
478
    else
479
    {
480
        for( ; height--; src1 += step1, src2 += step2, dst += step )
481
        {
482
            int i = vop(src1, src2, dst, width, scale);
483
            #if CV_ENABLE_UNROLLED
484
            for(; i <= width - 4; i += 4 )
485
            {
486
                T t0 = saturate_cast<T>(scale*(WT)src1[i]*src2[i]);
487
                T t1 = saturate_cast<T>(scale*(WT)src1[i+1]*src2[i+1]);
488
                dst[i] = t0; dst[i+1] = t1;
489

490
                t0 = saturate_cast<T>(scale*(WT)src1[i+2]*src2[i+2]);
491
                t1 = saturate_cast<T>(scale*(WT)src1[i+3]*src2[i+3]);
492
                dst[i+2] = t0; dst[i+3] = t1;
493
            }
494
            #endif
495
            for( ; i < width; i++ )
496
                dst[i] = saturate_cast<T>(scale*(WT)src1[i]*src2[i]);
497
        }
498
    }
499
}
500

501

502
template<typename T> static void
503
div_i( const T* src1, size_t step1, const T* src2, size_t step2,
504
      T* dst, size_t step, int width, int height, double scale )
505
{
506
    step1 /= sizeof(src1[0]);
507
    step2 /= sizeof(src2[0]);
508
    step /= sizeof(dst[0]);
509

510
    Div_SIMD<T> vop;
511
    float scale_f = (float)scale;
512

513
    for( ; height--; src1 += step1, src2 += step2, dst += step )
514
    {
515
        int i = vop(src1, src2, dst, width, scale);
516
        for( ; i < width; i++ )
517
        {
518
            T num = src1[i], denom = src2[i];
519
            T v = 0;
520
            if (denom != 0)
521
                v = saturate_cast<T>(num*scale_f/denom);
522
            dst[i] = v;
523
        }
524
    }
525
}
526

527
template<typename T> static void
528
div_f( const T* src1, size_t step1, const T* src2, size_t step2,
529
      T* dst, size_t step, int width, int height, double scale )
530
{
531
    T scale_f = (T)scale;
532
    step1 /= sizeof(src1[0]);
533
    step2 /= sizeof(src2[0]);
534
    step /= sizeof(dst[0]);
535

536
    Div_SIMD<T> vop;
537

538
    for( ; height--; src1 += step1, src2 += step2, dst += step )
539
    {
540
        int i = vop(src1, src2, dst, width, scale);
541
        for( ; i < width; i++ )
542
        {
543
            T num = src1[i], denom = src2[i];
544
            dst[i] = saturate_cast<T>(num*scale_f/denom);
545
        }
546
    }
547
}
548

549
template<typename T> static void
550
recip_i( const T* src2, size_t step2,
551
         T* dst, size_t step, int width, int height, double scale )
552
{
553
    step2 /= sizeof(src2[0]);
554
    step /= sizeof(dst[0]);
555

556
    Recip_SIMD<T> vop;
557
    float scale_f = (float)scale;
558

559
    for( ; height--; src2 += step2, dst += step )
560
    {
561
        int i = vop(src2, dst, width, scale);
562
        for( ; i < width; i++ )
563
        {
564
            T denom = src2[i];
565
            T v = 0;
566
            if (denom != 0)
567
                v = saturate_cast<T>(scale_f/denom);
568
            dst[i] = v;
569
        }
570
    }
571
}
572

573
template<typename T> static void
574
recip_f( const T* src2, size_t step2,
575
         T* dst, size_t step, int width, int height, double scale )
576
{
577
    T scale_f = (T)scale;
578
    step2 /= sizeof(src2[0]);
579
    step /= sizeof(dst[0]);
580

581
    Recip_SIMD<T> vop;
582

583
    for( ; height--; src2 += step2, dst += step )
584
    {
585
        int i = vop(src2, dst, width, scale);
586
        for( ; i < width; i++ )
587
        {
588
            T denom = src2[i];
589
            dst[i] = saturate_cast<T>(scale_f/denom);
590
        }
591
    }
592
}
593

594
template<typename T, typename WT> static void
595
addWeighted_( const T* src1, size_t step1, const T* src2, size_t step2,
596
              T* dst, size_t step, int width, int height, void* _scalars )
597
{
598
    const double* scalars = (const double*)_scalars;
599
    WT alpha = (WT)scalars[0], beta = (WT)scalars[1], gamma = (WT)scalars[2];
600
    step1 /= sizeof(src1[0]);
601
    step2 /= sizeof(src2[0]);
602
    step /= sizeof(dst[0]);
603

604
    AddWeighted_SIMD<T, WT> vop;
605

606
    for( ; height--; src1 += step1, src2 += step2, dst += step )
607
    {
608
        int x = vop(src1, src2, dst, width, alpha, beta, gamma);
609
        #if CV_ENABLE_UNROLLED
610
        for( ; x <= width - 4; x += 4 )
611
        {
612
            T t0 = saturate_cast<T>(src1[x]*alpha + src2[x]*beta + gamma);
613
            T t1 = saturate_cast<T>(src1[x+1]*alpha + src2[x+1]*beta + gamma);
614
            dst[x] = t0; dst[x+1] = t1;
615

616
            t0 = saturate_cast<T>(src1[x+2]*alpha + src2[x+2]*beta + gamma);
617
            t1 = saturate_cast<T>(src1[x+3]*alpha + src2[x+3]*beta + gamma);
618
            dst[x+2] = t0; dst[x+3] = t1;
619
        }
620
        #endif
621
        for( ; x < width; x++ )
622
            dst[x] = saturate_cast<T>(src1[x]*alpha + src2[x]*beta + gamma);
623
    }
624
}
625

626
} // cv::
627

628

629
#endif // __OPENCV_ARITHM_CORE_HPP__
630

631
Product

Resources

Company