CoCalc -- imgwarp.cpp

GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/imgproc/src/imgwarp.cpp
¹⁶³⁵⁴ views
1
/*M///////////////////////////////////////////////////////////////////////////////////////
2
//
3
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4
//
5
//  By downloading, copying, installing or using the software you agree to this license.
6
//  If you do not agree to this license, do not download, install,
7
//  copy or use the software.
8
//
9
//
10
//                           License Agreement
11
//                For Open Source Computer Vision Library
12
//
13
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15
// Copyright (C) 2014-2015, Itseez Inc., all rights reserved.
16
// Third party copyrights are property of their respective owners.
17
//
18
// Redistribution and use in source and binary forms, with or without modification,
19
// are permitted provided that the following conditions are met:
20
//
21
//   * Redistribution's of source code must retain the above copyright notice,
22
//     this list of conditions and the following disclaimer.
23
//
24
//   * Redistribution's in binary form must reproduce the above copyright notice,
25
//     this list of conditions and the following disclaimer in the documentation
26
//     and/or other materials provided with the distribution.
27
//
28
//   * The name of the copyright holders may not be used to endorse or promote products
29
//     derived from this software without specific prior written permission.
30
//
31
// This software is provided by the copyright holders and contributors "as is" and
32
// any express or implied warranties, including, but not limited to, the implied
33
// warranties of merchantability and fitness for a particular purpose are disclaimed.
34
// In no event shall the Intel Corporation or contributors be liable for any direct,
35
// indirect, incidental, special, exemplary, or consequential damages
36
// (including, but not limited to, procurement of substitute goods or services;
37
// loss of use, data, or profits; or business interruption) however caused
38
// and on any theory of liability, whether in contract, strict liability,
39
// or tort (including negligence or otherwise) arising in any way out of
40
// the use of this software, even if advised of the possibility of such damage.
41
//
42
//M*/
43

44
/* ////////////////////////////////////////////////////////////////////
45
//
46
//  Geometrical transforms on images and matrices: rotation, zoom etc.
47
//
48
// */
49

50
#include "precomp.hpp"
51
#include "opencl_kernels_imgproc.hpp"
52
#include "hal_replacement.hpp"
53
#include <opencv2/core/utils/configuration.private.hpp>
54
#include "opencv2/core/hal/intrin.hpp"
55
#include "opencv2/core/openvx/ovx_defs.hpp"
56
#include "opencv2/core/softfloat.hpp"
57
#include "imgwarp.hpp"
58

59
using namespace cv;
60

61
namespace cv
62
{
63

64
#if defined (HAVE_IPP) && (!IPP_DISABLE_WARPAFFINE || !IPP_DISABLE_WARPPERSPECTIVE || !IPP_DISABLE_REMAP)
65
typedef IppStatus (CV_STDCALL* ippiSetFunc)(const void*, void *, int, IppiSize);
66

67
template <int channels, typename Type>
68
bool IPPSetSimple(cv::Scalar value, void *dataPointer, int step, IppiSize &size, ippiSetFunc func)
69
{
70
    CV_INSTRUMENT_REGION_IPP();
71

72
    Type values[channels];
73
    for( int i = 0; i < channels; i++ )
74
        values[i] = saturate_cast<Type>(value[i]);
75
    return func(values, dataPointer, step, size) >= 0;
76
}
77

78
static bool IPPSet(const cv::Scalar &value, void *dataPointer, int step, IppiSize &size, int channels, int depth)
79
{
80
    CV_INSTRUMENT_REGION_IPP();
81

82
    if( channels == 1 )
83
    {
84
        switch( depth )
85
        {
86
        case CV_8U:
87
            return CV_INSTRUMENT_FUN_IPP(ippiSet_8u_C1R, saturate_cast<Ipp8u>(value[0]), (Ipp8u *)dataPointer, step, size) >= 0;
88
        case CV_16U:
89
            return CV_INSTRUMENT_FUN_IPP(ippiSet_16u_C1R, saturate_cast<Ipp16u>(value[0]), (Ipp16u *)dataPointer, step, size) >= 0;
90
        case CV_32F:
91
            return CV_INSTRUMENT_FUN_IPP(ippiSet_32f_C1R, saturate_cast<Ipp32f>(value[0]), (Ipp32f *)dataPointer, step, size) >= 0;
92
        }
93
    }
94
    else
95
    {
96
        if( channels == 3 )
97
        {
98
            switch( depth )
99
            {
100
            case CV_8U:
101
                return IPPSetSimple<3, Ipp8u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_8u_C3R);
102
            case CV_16U:
103
                return IPPSetSimple<3, Ipp16u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_16u_C3R);
104
            case CV_32F:
105
                return IPPSetSimple<3, Ipp32f>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_32f_C3R);
106
            }
107
        }
108
        else if( channels == 4 )
109
        {
110
            switch( depth )
111
            {
112
            case CV_8U:
113
                return IPPSetSimple<4, Ipp8u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_8u_C4R);
114
            case CV_16U:
115
                return IPPSetSimple<4, Ipp16u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_16u_C4R);
116
            case CV_32F:
117
                return IPPSetSimple<4, Ipp32f>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_32f_C4R);
118
            }
119
        }
120
    }
121
    return false;
122
}
123
#endif
124

125
/************** interpolation formulas and tables ***************/
126

127
const int INTER_REMAP_COEF_BITS=15;
128
const int INTER_REMAP_COEF_SCALE=1 << INTER_REMAP_COEF_BITS;
129

130
static uchar NNDeltaTab_i[INTER_TAB_SIZE2][2];
131

132
static float BilinearTab_f[INTER_TAB_SIZE2][2][2];
133
static short BilinearTab_i[INTER_TAB_SIZE2][2][2];
134

135
#if CV_SIMD128
136
static short BilinearTab_iC4_buf[INTER_TAB_SIZE2+2][2][8];
137
static short (*BilinearTab_iC4)[2][8] = (short (*)[2][8])alignPtr(BilinearTab_iC4_buf, 16);
138
#endif
139

140
static float BicubicTab_f[INTER_TAB_SIZE2][4][4];
141
static short BicubicTab_i[INTER_TAB_SIZE2][4][4];
142

143
static float Lanczos4Tab_f[INTER_TAB_SIZE2][8][8];
144
static short Lanczos4Tab_i[INTER_TAB_SIZE2][8][8];
145

146
static inline void interpolateLinear( float x, float* coeffs )
147
{
148
    coeffs[0] = 1.f - x;
149
    coeffs[1] = x;
150
}
151

152
static inline void interpolateCubic( float x, float* coeffs )
153
{
154
    const float A = -0.75f;
155

156
    coeffs[0] = ((A*(x + 1) - 5*A)*(x + 1) + 8*A)*(x + 1) - 4*A;
157
    coeffs[1] = ((A + 2)*x - (A + 3))*x*x + 1;
158
    coeffs[2] = ((A + 2)*(1 - x) - (A + 3))*(1 - x)*(1 - x) + 1;
159
    coeffs[3] = 1.f - coeffs[0] - coeffs[1] - coeffs[2];
160
}
161

162
static inline void interpolateLanczos4( float x, float* coeffs )
163
{
164
    static const double s45 = 0.70710678118654752440084436210485;
165
    static const double cs[][2]=
166
    {{1, 0}, {-s45, -s45}, {0, 1}, {s45, -s45}, {-1, 0}, {s45, s45}, {0, -1}, {-s45, s45}};
167

168
    if( x < FLT_EPSILON )
169
    {
170
        for( int i = 0; i < 8; i++ )
171
            coeffs[i] = 0;
172
        coeffs[3] = 1;
173
        return;
174
    }
175

176
    float sum = 0;
177
    double y0=-(x+3)*CV_PI*0.25, s0 = std::sin(y0), c0= std::cos(y0);
178
    for(int i = 0; i < 8; i++ )
179
    {
180
        double y = -(x+3-i)*CV_PI*0.25;
181
        coeffs[i] = (float)((cs[i][0]*s0 + cs[i][1]*c0)/(y*y));
182
        sum += coeffs[i];
183
    }
184

185
    sum = 1.f/sum;
186
    for(int i = 0; i < 8; i++ )
187
        coeffs[i] *= sum;
188
}
189

190
static void initInterTab1D(int method, float* tab, int tabsz)
191
{
192
    float scale = 1.f/tabsz;
193
    if( method == INTER_LINEAR )
194
    {
195
        for( int i = 0; i < tabsz; i++, tab += 2 )
196
            interpolateLinear( i*scale, tab );
197
    }
198
    else if( method == INTER_CUBIC )
199
    {
200
        for( int i = 0; i < tabsz; i++, tab += 4 )
201
            interpolateCubic( i*scale, tab );
202
    }
203
    else if( method == INTER_LANCZOS4 )
204
    {
205
        for( int i = 0; i < tabsz; i++, tab += 8 )
206
            interpolateLanczos4( i*scale, tab );
207
    }
208
    else
209
        CV_Error( CV_StsBadArg, "Unknown interpolation method" );
210
}
211

212

213
static const void* initInterTab2D( int method, bool fixpt )
214
{
215
    static bool inittab[INTER_MAX+1] = {false};
216
    float* tab = 0;
217
    short* itab = 0;
218
    int ksize = 0;
219
    if( method == INTER_LINEAR )
220
        tab = BilinearTab_f[0][0], itab = BilinearTab_i[0][0], ksize=2;
221
    else if( method == INTER_CUBIC )
222
        tab = BicubicTab_f[0][0], itab = BicubicTab_i[0][0], ksize=4;
223
    else if( method == INTER_LANCZOS4 )
224
        tab = Lanczos4Tab_f[0][0], itab = Lanczos4Tab_i[0][0], ksize=8;
225
    else
226
        CV_Error( CV_StsBadArg, "Unknown/unsupported interpolation type" );
227

228
    if( !inittab[method] )
229
    {
230
        AutoBuffer<float> _tab(8*INTER_TAB_SIZE);
231
        int i, j, k1, k2;
232
        initInterTab1D(method, _tab.data(), INTER_TAB_SIZE);
233
        for( i = 0; i < INTER_TAB_SIZE; i++ )
234
            for( j = 0; j < INTER_TAB_SIZE; j++, tab += ksize*ksize, itab += ksize*ksize )
235
            {
236
                int isum = 0;
237
                NNDeltaTab_i[i*INTER_TAB_SIZE+j][0] = j < INTER_TAB_SIZE/2;
238
                NNDeltaTab_i[i*INTER_TAB_SIZE+j][1] = i < INTER_TAB_SIZE/2;
239

240
                for( k1 = 0; k1 < ksize; k1++ )
241
                {
242
                    float vy = _tab[i*ksize + k1];
243
                    for( k2 = 0; k2 < ksize; k2++ )
244
                    {
245
                        float v = vy*_tab[j*ksize + k2];
246
                        tab[k1*ksize + k2] = v;
247
                        isum += itab[k1*ksize + k2] = saturate_cast<short>(v*INTER_REMAP_COEF_SCALE);
248
                    }
249
                }
250

251
                if( isum != INTER_REMAP_COEF_SCALE )
252
                {
253
                    int diff = isum - INTER_REMAP_COEF_SCALE;
254
                    int ksize2 = ksize/2, Mk1=ksize2, Mk2=ksize2, mk1=ksize2, mk2=ksize2;
255
                    for( k1 = ksize2; k1 < ksize2+2; k1++ )
256
                        for( k2 = ksize2; k2 < ksize2+2; k2++ )
257
                        {
258
                            if( itab[k1*ksize+k2] < itab[mk1*ksize+mk2] )
259
                                mk1 = k1, mk2 = k2;
260
                            else if( itab[k1*ksize+k2] > itab[Mk1*ksize+Mk2] )
261
                                Mk1 = k1, Mk2 = k2;
262
                        }
263
                    if( diff < 0 )
264
                        itab[Mk1*ksize + Mk2] = (short)(itab[Mk1*ksize + Mk2] - diff);
265
                    else
266
                        itab[mk1*ksize + mk2] = (short)(itab[mk1*ksize + mk2] - diff);
267
                }
268
            }
269
        tab -= INTER_TAB_SIZE2*ksize*ksize;
270
        itab -= INTER_TAB_SIZE2*ksize*ksize;
271
#if CV_SIMD128
272
        if( method == INTER_LINEAR )
273
        {
274
            for( i = 0; i < INTER_TAB_SIZE2; i++ )
275
                for( j = 0; j < 4; j++ )
276
                {
277
                    BilinearTab_iC4[i][0][j*2] = BilinearTab_i[i][0][0];
278
                    BilinearTab_iC4[i][0][j*2+1] = BilinearTab_i[i][0][1];
279
                    BilinearTab_iC4[i][1][j*2] = BilinearTab_i[i][1][0];
280
                    BilinearTab_iC4[i][1][j*2+1] = BilinearTab_i[i][1][1];
281
                }
282
        }
283
#endif
284
        inittab[method] = true;
285
    }
286
    return fixpt ? (const void*)itab : (const void*)tab;
287
}
288

289
#ifndef __MINGW32__
290
static bool initAllInterTab2D()
291
{
292
    return  initInterTab2D( INTER_LINEAR, false ) &&
293
            initInterTab2D( INTER_LINEAR, true ) &&
294
            initInterTab2D( INTER_CUBIC, false ) &&
295
            initInterTab2D( INTER_CUBIC, true ) &&
296
            initInterTab2D( INTER_LANCZOS4, false ) &&
297
            initInterTab2D( INTER_LANCZOS4, true );
298
}
299

300
static volatile bool doInitAllInterTab2D = initAllInterTab2D();
301
#endif
302

303
template<typename ST, typename DT> struct Cast
304
{
305
    typedef ST type1;
306
    typedef DT rtype;
307

308
    DT operator()(ST val) const { return saturate_cast<DT>(val); }
309
};
310

311
template<typename ST, typename DT, int bits> struct FixedPtCast
312
{
313
    typedef ST type1;
314
    typedef DT rtype;
315
    enum { SHIFT = bits, DELTA = 1 << (bits-1) };
316

317
    DT operator()(ST val) const { return saturate_cast<DT>((val + DELTA)>>SHIFT); }
318
};
319

320
static inline int clip(int x, int a, int b)
321
{
322
    return x >= a ? (x < b ? x : b-1) : a;
323
}
324

325
/****************************************************************************************\
326
*                       General warping (affine, perspective, remap)                     *
327
\****************************************************************************************/
328

329
template<typename T>
330
static void remapNearest( const Mat& _src, Mat& _dst, const Mat& _xy,
331
                          int borderType, const Scalar& _borderValue )
332
{
333
    Size ssize = _src.size(), dsize = _dst.size();
334
    const int cn = _src.channels();
335
    const T* S0 = _src.ptr<T>();
336
    T cval[CV_CN_MAX];
337
    size_t sstep = _src.step/sizeof(S0[0]);
338

339
    for(int k = 0; k < cn; k++ )
340
        cval[k] = saturate_cast<T>(_borderValue[k & 3]);
341

342
    unsigned width1 = ssize.width, height1 = ssize.height;
343

344
    if( _dst.isContinuous() && _xy.isContinuous() )
345
    {
346
        dsize.width *= dsize.height;
347
        dsize.height = 1;
348
    }
349

350
    for(int dy = 0; dy < dsize.height; dy++ )
351
    {
352
        T* D = _dst.ptr<T>(dy);
353
        const short* XY = _xy.ptr<short>(dy);
354

355
        if( cn == 1 )
356
        {
357
            for(int dx = 0; dx < dsize.width; dx++ )
358
            {
359
                int sx = XY[dx*2], sy = XY[dx*2+1];
360
                if( (unsigned)sx < width1 && (unsigned)sy < height1 )
361
                    D[dx] = S0[sy*sstep + sx];
362
                else
363
                {
364
                    if( borderType == BORDER_REPLICATE )
365
                    {
366
                        sx = clip(sx, 0, ssize.width);
367
                        sy = clip(sy, 0, ssize.height);
368
                        D[dx] = S0[sy*sstep + sx];
369
                    }
370
                    else if( borderType == BORDER_CONSTANT )
371
                        D[dx] = cval[0];
372
                    else if( borderType != BORDER_TRANSPARENT )
373
                    {
374
                        sx = borderInterpolate(sx, ssize.width, borderType);
375
                        sy = borderInterpolate(sy, ssize.height, borderType);
376
                        D[dx] = S0[sy*sstep + sx];
377
                    }
378
                }
379
            }
380
        }
381
        else
382
        {
383
            for(int dx = 0; dx < dsize.width; dx++, D += cn )
384
            {
385
                int sx = XY[dx*2], sy = XY[dx*2+1];
386
                const T *S;
387
                if( (unsigned)sx < width1 && (unsigned)sy < height1 )
388
                {
389
                    if( cn == 3 )
390
                    {
391
                        S = S0 + sy*sstep + sx*3;
392
                        D[0] = S[0], D[1] = S[1], D[2] = S[2];
393
                    }
394
                    else if( cn == 4 )
395
                    {
396
                        S = S0 + sy*sstep + sx*4;
397
                        D[0] = S[0], D[1] = S[1], D[2] = S[2], D[3] = S[3];
398
                    }
399
                    else
400
                    {
401
                        S = S0 + sy*sstep + sx*cn;
402
                        for(int k = 0; k < cn; k++ )
403
                            D[k] = S[k];
404
                    }
405
                }
406
                else if( borderType != BORDER_TRANSPARENT )
407
                {
408
                    if( borderType == BORDER_REPLICATE )
409
                    {
410
                        sx = clip(sx, 0, ssize.width);
411
                        sy = clip(sy, 0, ssize.height);
412
                        S = S0 + sy*sstep + sx*cn;
413
                    }
414
                    else if( borderType == BORDER_CONSTANT )
415
                        S = &cval[0];
416
                    else
417
                    {
418
                        sx = borderInterpolate(sx, ssize.width, borderType);
419
                        sy = borderInterpolate(sy, ssize.height, borderType);
420
                        S = S0 + sy*sstep + sx*cn;
421
                    }
422
                    for(int k = 0; k < cn; k++ )
423
                        D[k] = S[k];
424
                }
425
            }
426
        }
427
    }
428
}
429

430

431
struct RemapNoVec
432
{
433
    int operator()( const Mat&, void*, const short*, const ushort*,
434
                    const void*, int ) const { return 0; }
435
};
436

437
#if CV_SIMD128
438

439
struct RemapVec_8u
440
{
441
    int operator()( const Mat& _src, void* _dst, const short* XY,
442
                    const ushort* FXY, const void* _wtab, int width ) const
443
    {
444
        int cn = _src.channels(), x = 0, sstep = (int)_src.step;
445

446
        if( (cn != 1 && cn != 3 && cn != 4) || !hasSIMD128() ||
447
            sstep > 0x8000 )
448
            return 0;
449

450
        const uchar *S0 = _src.ptr(), *S1 = _src.ptr(1);
451
        const short* wtab = cn == 1 ? (const short*)_wtab : &BilinearTab_iC4[0][0][0];
452
        uchar* D = (uchar*)_dst;
453
        v_int32x4 delta = v_setall_s32(INTER_REMAP_COEF_SCALE / 2);
454
        v_int16x8 xy2ofs = v_reinterpret_as_s16(v_setall_s32(cn + (sstep << 16)));
455
        int CV_DECL_ALIGNED(16) iofs0[4], iofs1[4];
456
        const uchar* src_limit_8bytes = _src.datalimit - v_int16x8::nlanes;
457
#define CV_PICK_AND_PACK_RGB(ptr, offset, result)  \
458
        {                                          \
459
            const uchar* const p = ((const uchar*)ptr) + (offset); \
460
            if (p <= src_limit_8bytes)             \
461
            {                                      \
462
                v_uint8x16 rrggbb, dummy;          \
463
                v_uint16x8 rrggbb8, dummy8;        \
464
                v_uint8x16 rgb0 = v_reinterpret_as_u8(v_int32x4(*(int*)(p), 0, 0, 0)); \
465
                v_uint8x16 rgb1 = v_reinterpret_as_u8(v_int32x4(*(int*)(p + 3), 0, 0, 0)); \
466
                v_zip(rgb0, rgb1, rrggbb, dummy);  \
467
                v_expand(rrggbb, rrggbb8, dummy8); \
468
                result = v_reinterpret_as_s16(rrggbb8); \
469
            }                                      \
470
            else                                   \
471
            {                                      \
472
                result = v_int16x8((short)p[0], (short)p[3], /* r0r1 */ \
473
                                   (short)p[1], (short)p[4], /* g0g1 */ \
474
                                   (short)p[2], (short)p[5], /* b0b1 */ 0, 0); \
475
            }                                      \
476
        }
477
#define CV_PICK_AND_PACK_RGBA(ptr, offset, result) \
478
        {                                          \
479
            const uchar* const p = ((const uchar*)ptr) + (offset); \
480
            CV_DbgAssert(p <= src_limit_8bytes);   \
481
            v_uint8x16 rrggbbaa, dummy;            \
482
            v_uint16x8 rrggbbaa8, dummy8;          \
483
            v_uint8x16 rgba0 = v_reinterpret_as_u8(v_int32x4(*(int*)(p), 0, 0, 0)); \
484
            v_uint8x16 rgba1 = v_reinterpret_as_u8(v_int32x4(*(int*)(p + v_int32x4::nlanes), 0, 0, 0)); \
485
            v_zip(rgba0, rgba1, rrggbbaa, dummy);  \
486
            v_expand(rrggbbaa, rrggbbaa8, dummy8); \
487
            result = v_reinterpret_as_s16(rrggbbaa8); \
488
        }
489
#define CV_PICK_AND_PACK4(base,offset)             \
490
            v_uint16x8(*(ushort*)(base + offset[0]), *(ushort*)(base + offset[1]), \
491
                       *(ushort*)(base + offset[2]), *(ushort*)(base + offset[3]), \
492
                       0, 0, 0, 0)
493

494
        if( cn == 1 )
495
        {
496
            for( ; x <= width - 8; x += 8 )
497
            {
498
                v_int16x8 _xy0 = v_load(XY + x*2);
499
                v_int16x8 _xy1 = v_load(XY + x*2 + 8);
500
                v_int32x4 v0, v1, v2, v3, a0, b0, c0, d0, a1, b1, c1, d1, a2, b2, c2, d2;
501

502
                v_int32x4 xy0 = v_dotprod( _xy0, xy2ofs );
503
                v_int32x4 xy1 = v_dotprod( _xy1, xy2ofs );
504
                v_store( iofs0, xy0 );
505
                v_store( iofs1, xy1 );
506

507
                v_uint16x8 stub, dummy;
508
                v_uint16x8 vec16;
509
                vec16 = CV_PICK_AND_PACK4(S0, iofs0);
510
                v_expand(v_reinterpret_as_u8(vec16), stub, dummy);
511
                v0 = v_reinterpret_as_s32(stub);
512
                vec16 = CV_PICK_AND_PACK4(S1, iofs0);
513
                v_expand(v_reinterpret_as_u8(vec16), stub, dummy);
514
                v1 = v_reinterpret_as_s32(stub);
515

516
                v_zip(v_load_low((int*)(wtab + FXY[x]     * 4)), v_load_low((int*)(wtab + FXY[x + 1] * 4)), a0, a1);
517
                v_zip(v_load_low((int*)(wtab + FXY[x + 2] * 4)), v_load_low((int*)(wtab + FXY[x + 3] * 4)), b0, b1);
518
                v_recombine(a0, b0, a2, b2);
519
                v1 = v_dotprod(v_reinterpret_as_s16(v1), v_reinterpret_as_s16(b2), delta);
520
                v0 = v_dotprod(v_reinterpret_as_s16(v0), v_reinterpret_as_s16(a2), v1);
521

522
                vec16 = CV_PICK_AND_PACK4(S0, iofs1);
523
                v_expand(v_reinterpret_as_u8(vec16), stub, dummy);
524
                v2 = v_reinterpret_as_s32(stub);
525
                vec16 = CV_PICK_AND_PACK4(S1, iofs1);
526
                v_expand(v_reinterpret_as_u8(vec16), stub, dummy);
527
                v3 = v_reinterpret_as_s32(stub);
528

529
                v_zip(v_load_low((int*)(wtab + FXY[x + 4] * 4)), v_load_low((int*)(wtab + FXY[x + 5] * 4)), c0, c1);
530
                v_zip(v_load_low((int*)(wtab + FXY[x + 6] * 4)), v_load_low((int*)(wtab + FXY[x + 7] * 4)), d0, d1);
531
                v_recombine(c0, d0, c2, d2);
532
                v3 = v_dotprod(v_reinterpret_as_s16(v3), v_reinterpret_as_s16(d2), delta);
533
                v2 = v_dotprod(v_reinterpret_as_s16(v2), v_reinterpret_as_s16(c2), v3);
534

535
                v0 = v0 >> INTER_REMAP_COEF_BITS;
536
                v2 = v2 >> INTER_REMAP_COEF_BITS;
537
                v_pack_u_store(D + x, v_pack(v0, v2));
538
            }
539
        }
540
        else if( cn == 3 )
541
        {
542
            for( ; x <= width - 5; x += 4, D += 12 )
543
            {
544
                v_int16x8 u0, v0, u1, v1;
545
                v_int16x8 _xy0 = v_load(XY + x * 2);
546

547
                v_int32x4 xy0 = v_dotprod(_xy0, xy2ofs);
548
                v_store(iofs0, xy0);
549

550
                int offset0 = FXY[x] * 16;
551
                int offset1 = FXY[x + 1] * 16;
552
                int offset2 = FXY[x + 2] * 16;
553
                int offset3 = FXY[x + 3] * 16;
554
                v_int16x8 w00 = v_load(wtab + offset0);
555
                v_int16x8 w01 = v_load(wtab + offset0 + 8);
556
                v_int16x8 w10 = v_load(wtab + offset1);
557
                v_int16x8 w11 = v_load(wtab + offset1 + 8);
558

559
                CV_PICK_AND_PACK_RGB(S0, iofs0[0], u0);
560
                CV_PICK_AND_PACK_RGB(S1, iofs0[0], v0);
561
                CV_PICK_AND_PACK_RGB(S0, iofs0[1], u1);
562
                CV_PICK_AND_PACK_RGB(S1, iofs0[1], v1);
563

564
                v_int32x4 result0 = v_dotprod(u0, w00, v_dotprod(v0, w01, delta)) >> INTER_REMAP_COEF_BITS;
565
                v_int32x4 result1 = v_dotprod(u1, w10, v_dotprod(v1, w11, delta)) >> INTER_REMAP_COEF_BITS;
566

567
                result0 = v_rotate_left<1>(result0);
568
                v_int16x8 result8 = v_pack(result0, result1);
569
                v_uint8x16 result16 = v_pack_u(result8, result8);
570
                v_store_low(D, v_rotate_right<1>(result16));
571

572

573
                w00 = v_load(wtab + offset2);
574
                w01 = v_load(wtab + offset2 + 8);
575
                w10 = v_load(wtab + offset3);
576
                w11 = v_load(wtab + offset3 + 8);
577
                CV_PICK_AND_PACK_RGB(S0, iofs0[2], u0);
578
                CV_PICK_AND_PACK_RGB(S1, iofs0[2], v0);
579
                CV_PICK_AND_PACK_RGB(S0, iofs0[3], u1);
580
                CV_PICK_AND_PACK_RGB(S1, iofs0[3], v1);
581

582
                result0 = v_dotprod(u0, w00, v_dotprod(v0, w01, delta)) >> INTER_REMAP_COEF_BITS;
583
                result1 = v_dotprod(u1, w10, v_dotprod(v1, w11, delta)) >> INTER_REMAP_COEF_BITS;
584

585
                result0 = v_rotate_left<1>(result0);
586
                result8 = v_pack(result0, result1);
587
                result16 = v_pack_u(result8, result8);
588
                v_store_low(D + 6, v_rotate_right<1>(result16));
589
            }
590
        }
591
        else if( cn == 4 )
592
        {
593
            for( ; x <= width - 4; x += 4, D += 16 )
594
            {
595
                v_int16x8 _xy0 = v_load(XY + x * 2);
596
                v_int16x8 u0, v0, u1, v1;
597

598
                v_int32x4 xy0 = v_dotprod( _xy0, xy2ofs );
599
                v_store(iofs0, xy0);
600
                int offset0 = FXY[x] * 16;
601
                int offset1 = FXY[x + 1] * 16;
602
                int offset2 = FXY[x + 2] * 16;
603
                int offset3 = FXY[x + 3] * 16;
604

605
                v_int16x8 w00 = v_load(wtab + offset0);
606
                v_int16x8 w01 = v_load(wtab + offset0 + 8);
607
                v_int16x8 w10 = v_load(wtab + offset1);
608
                v_int16x8 w11 = v_load(wtab + offset1 + 8);
609
                CV_PICK_AND_PACK_RGBA(S0, iofs0[0], u0);
610
                CV_PICK_AND_PACK_RGBA(S1, iofs0[0], v0);
611
                CV_PICK_AND_PACK_RGBA(S0, iofs0[1], u1);
612
                CV_PICK_AND_PACK_RGBA(S1, iofs0[1], v1);
613

614
                v_int32x4 result0 = v_dotprod(u0, w00, v_dotprod(v0, w01, delta)) >> INTER_REMAP_COEF_BITS;
615
                v_int32x4 result1 = v_dotprod(u1, w10, v_dotprod(v1, w11, delta)) >> INTER_REMAP_COEF_BITS;
616
                v_int16x8 result8 = v_pack(result0, result1);
617
                v_pack_u_store(D, result8);
618

619
                w00 = v_load(wtab + offset2);
620
                w01 = v_load(wtab + offset2 + 8);
621
                w10 = v_load(wtab + offset3);
622
                w11 = v_load(wtab + offset3 + 8);
623
                CV_PICK_AND_PACK_RGBA(S0, iofs0[2], u0);
624
                CV_PICK_AND_PACK_RGBA(S1, iofs0[2], v0);
625
                CV_PICK_AND_PACK_RGBA(S0, iofs0[3], u1);
626
                CV_PICK_AND_PACK_RGBA(S1, iofs0[3], v1);
627

628
                result0 = v_dotprod(u0, w00, v_dotprod(v0, w01, delta)) >> INTER_REMAP_COEF_BITS;
629
                result1 = v_dotprod(u1, w10, v_dotprod(v1, w11, delta)) >> INTER_REMAP_COEF_BITS;
630
                result8 = v_pack(result0, result1);
631
                v_pack_u_store(D + 8, result8);
632
            }
633
        }
634

635
        return x;
636
    }
637
};
638

639
#else
640

641
typedef RemapNoVec RemapVec_8u;
642

643
#endif
644

645

646
template<class CastOp, class VecOp, typename AT>
647
static void remapBilinear( const Mat& _src, Mat& _dst, const Mat& _xy,
648
                           const Mat& _fxy, const void* _wtab,
649
                           int borderType, const Scalar& _borderValue )
650
{
651
    typedef typename CastOp::rtype T;
652
    typedef typename CastOp::type1 WT;
653
    Size ssize = _src.size(), dsize = _dst.size();
654
    const int cn = _src.channels();
655
    const AT* wtab = (const AT*)_wtab;
656
    const T* S0 = _src.ptr<T>();
657
    size_t sstep = _src.step/sizeof(S0[0]);
658
    T cval[CV_CN_MAX];
659
    CastOp castOp;
660
    VecOp vecOp;
661

662
    for(int k = 0; k < cn; k++ )
663
        cval[k] = saturate_cast<T>(_borderValue[k & 3]);
664

665
    unsigned width1 = std::max(ssize.width-1, 0), height1 = std::max(ssize.height-1, 0);
666
    CV_Assert( !ssize.empty() );
667
#if CV_SIMD128
668
    if( _src.type() == CV_8UC3 )
669
        width1 = std::max(ssize.width-2, 0);
670
#endif
671

672
    for(int dy = 0; dy < dsize.height; dy++ )
673
    {
674
        T* D = _dst.ptr<T>(dy);
675
        const short* XY = _xy.ptr<short>(dy);
676
        const ushort* FXY = _fxy.ptr<ushort>(dy);
677
        int X0 = 0;
678
        bool prevInlier = false;
679

680
        for(int dx = 0; dx <= dsize.width; dx++ )
681
        {
682
            bool curInlier = dx < dsize.width ?
683
                (unsigned)XY[dx*2] < width1 &&
684
                (unsigned)XY[dx*2+1] < height1 : !prevInlier;
685
            if( curInlier == prevInlier )
686
                continue;
687

688
            int X1 = dx;
689
            dx = X0;
690
            X0 = X1;
691
            prevInlier = curInlier;
692

693
            if( !curInlier )
694
            {
695
                int len = vecOp( _src, D, XY + dx*2, FXY + dx, wtab, X1 - dx );
696
                D += len*cn;
697
                dx += len;
698

699
                if( cn == 1 )
700
                {
701
                    for( ; dx < X1; dx++, D++ )
702
                    {
703
                        int sx = XY[dx*2], sy = XY[dx*2+1];
704
                        const AT* w = wtab + FXY[dx]*4;
705
                        const T* S = S0 + sy*sstep + sx;
706
                        *D = castOp(WT(S[0]*w[0] + S[1]*w[1] + S[sstep]*w[2] + S[sstep+1]*w[3]));
707
                    }
708
                }
709
                else if( cn == 2 )
710
                    for( ; dx < X1; dx++, D += 2 )
711
                    {
712
                        int sx = XY[dx*2], sy = XY[dx*2+1];
713
                        const AT* w = wtab + FXY[dx]*4;
714
                        const T* S = S0 + sy*sstep + sx*2;
715
                        WT t0 = S[0]*w[0] + S[2]*w[1] + S[sstep]*w[2] + S[sstep+2]*w[3];
716
                        WT t1 = S[1]*w[0] + S[3]*w[1] + S[sstep+1]*w[2] + S[sstep+3]*w[3];
717
                        D[0] = castOp(t0); D[1] = castOp(t1);
718
                    }
719
                else if( cn == 3 )
720
                    for( ; dx < X1; dx++, D += 3 )
721
                    {
722
                        int sx = XY[dx*2], sy = XY[dx*2+1];
723
                        const AT* w = wtab + FXY[dx]*4;
724
                        const T* S = S0 + sy*sstep + sx*3;
725
                        WT t0 = S[0]*w[0] + S[3]*w[1] + S[sstep]*w[2] + S[sstep+3]*w[3];
726
                        WT t1 = S[1]*w[0] + S[4]*w[1] + S[sstep+1]*w[2] + S[sstep+4]*w[3];
727
                        WT t2 = S[2]*w[0] + S[5]*w[1] + S[sstep+2]*w[2] + S[sstep+5]*w[3];
728
                        D[0] = castOp(t0); D[1] = castOp(t1); D[2] = castOp(t2);
729
                    }
730
                else if( cn == 4 )
731
                    for( ; dx < X1; dx++, D += 4 )
732
                    {
733
                        int sx = XY[dx*2], sy = XY[dx*2+1];
734
                        const AT* w = wtab + FXY[dx]*4;
735
                        const T* S = S0 + sy*sstep + sx*4;
736
                        WT t0 = S[0]*w[0] + S[4]*w[1] + S[sstep]*w[2] + S[sstep+4]*w[3];
737
                        WT t1 = S[1]*w[0] + S[5]*w[1] + S[sstep+1]*w[2] + S[sstep+5]*w[3];
738
                        D[0] = castOp(t0); D[1] = castOp(t1);
739
                        t0 = S[2]*w[0] + S[6]*w[1] + S[sstep+2]*w[2] + S[sstep+6]*w[3];
740
                        t1 = S[3]*w[0] + S[7]*w[1] + S[sstep+3]*w[2] + S[sstep+7]*w[3];
741
                        D[2] = castOp(t0); D[3] = castOp(t1);
742
                    }
743
                else
744
                    for( ; dx < X1; dx++, D += cn )
745
                    {
746
                        int sx = XY[dx*2], sy = XY[dx*2+1];
747
                        const AT* w = wtab + FXY[dx]*4;
748
                        const T* S = S0 + sy*sstep + sx*cn;
749
                        for(int k = 0; k < cn; k++ )
750
                        {
751
                            WT t0 = S[k]*w[0] + S[k+cn]*w[1] + S[sstep+k]*w[2] + S[sstep+k+cn]*w[3];
752
                            D[k] = castOp(t0);
753
                        }
754
                    }
755
            }
756
            else
757
            {
758
                if( borderType == BORDER_TRANSPARENT && cn != 3 )
759
                {
760
                    D += (X1 - dx)*cn;
761
                    dx = X1;
762
                    continue;
763
                }
764

765
                if( cn == 1 )
766
                    for( ; dx < X1; dx++, D++ )
767
                    {
768
                        int sx = XY[dx*2], sy = XY[dx*2+1];
769
                        if( borderType == BORDER_CONSTANT &&
770
                            (sx >= ssize.width || sx+1 < 0 ||
771
                             sy >= ssize.height || sy+1 < 0) )
772
                        {
773
                            D[0] = cval[0];
774
                        }
775
                        else
776
                        {
777
                            int sx0, sx1, sy0, sy1;
778
                            T v0, v1, v2, v3;
779
                            const AT* w = wtab + FXY[dx]*4;
780
                            if( borderType == BORDER_REPLICATE )
781
                            {
782
                                sx0 = clip(sx, 0, ssize.width);
783
                                sx1 = clip(sx+1, 0, ssize.width);
784
                                sy0 = clip(sy, 0, ssize.height);
785
                                sy1 = clip(sy+1, 0, ssize.height);
786
                                v0 = S0[sy0*sstep + sx0];
787
                                v1 = S0[sy0*sstep + sx1];
788
                                v2 = S0[sy1*sstep + sx0];
789
                                v3 = S0[sy1*sstep + sx1];
790
                            }
791
                            else
792
                            {
793
                                sx0 = borderInterpolate(sx, ssize.width, borderType);
794
                                sx1 = borderInterpolate(sx+1, ssize.width, borderType);
795
                                sy0 = borderInterpolate(sy, ssize.height, borderType);
796
                                sy1 = borderInterpolate(sy+1, ssize.height, borderType);
797
                                v0 = sx0 >= 0 && sy0 >= 0 ? S0[sy0*sstep + sx0] : cval[0];
798
                                v1 = sx1 >= 0 && sy0 >= 0 ? S0[sy0*sstep + sx1] : cval[0];
799
                                v2 = sx0 >= 0 && sy1 >= 0 ? S0[sy1*sstep + sx0] : cval[0];
800
                                v3 = sx1 >= 0 && sy1 >= 0 ? S0[sy1*sstep + sx1] : cval[0];
801
                            }
802
                            D[0] = castOp(WT(v0*w[0] + v1*w[1] + v2*w[2] + v3*w[3]));
803
                        }
804
                    }
805
                else
806
                    for( ; dx < X1; dx++, D += cn )
807
                    {
808
                        int sx = XY[dx*2], sy = XY[dx*2+1];
809
                        if( borderType == BORDER_CONSTANT &&
810
                            (sx >= ssize.width || sx+1 < 0 ||
811
                             sy >= ssize.height || sy+1 < 0) )
812
                        {
813
                            for(int k = 0; k < cn; k++ )
814
                                D[k] = cval[k];
815
                        }
816
                        else
817
                        {
818
                            int sx0, sx1, sy0, sy1;
819
                            const T *v0, *v1, *v2, *v3;
820
                            const AT* w = wtab + FXY[dx]*4;
821
                            if( borderType == BORDER_REPLICATE )
822
                            {
823
                                sx0 = clip(sx, 0, ssize.width);
824
                                sx1 = clip(sx+1, 0, ssize.width);
825
                                sy0 = clip(sy, 0, ssize.height);
826
                                sy1 = clip(sy+1, 0, ssize.height);
827
                                v0 = S0 + sy0*sstep + sx0*cn;
828
                                v1 = S0 + sy0*sstep + sx1*cn;
829
                                v2 = S0 + sy1*sstep + sx0*cn;
830
                                v3 = S0 + sy1*sstep + sx1*cn;
831
                            }
832
                            else if( borderType == BORDER_TRANSPARENT &&
833
                                ((unsigned)sx >= (unsigned)(ssize.width-1) ||
834
                                (unsigned)sy >= (unsigned)(ssize.height-1)))
835
                                continue;
836
                            else
837
                            {
838
                                sx0 = borderInterpolate(sx, ssize.width, borderType);
839
                                sx1 = borderInterpolate(sx+1, ssize.width, borderType);
840
                                sy0 = borderInterpolate(sy, ssize.height, borderType);
841
                                sy1 = borderInterpolate(sy+1, ssize.height, borderType);
842
                                v0 = sx0 >= 0 && sy0 >= 0 ? S0 + sy0*sstep + sx0*cn : &cval[0];
843
                                v1 = sx1 >= 0 && sy0 >= 0 ? S0 + sy0*sstep + sx1*cn : &cval[0];
844
                                v2 = sx0 >= 0 && sy1 >= 0 ? S0 + sy1*sstep + sx0*cn : &cval[0];
845
                                v3 = sx1 >= 0 && sy1 >= 0 ? S0 + sy1*sstep + sx1*cn : &cval[0];
846
                            }
847
                            for(int k = 0; k < cn; k++ )
848
                                D[k] = castOp(WT(v0[k]*w[0] + v1[k]*w[1] + v2[k]*w[2] + v3[k]*w[3]));
849
                        }
850
                    }
851
            }
852
        }
853
    }
854
}
855

856

857
template<class CastOp, typename AT, int ONE>
858
static void remapBicubic( const Mat& _src, Mat& _dst, const Mat& _xy,
859
                          const Mat& _fxy, const void* _wtab,
860
                          int borderType, const Scalar& _borderValue )
861
{
862
    typedef typename CastOp::rtype T;
863
    typedef typename CastOp::type1 WT;
864
    Size ssize = _src.size(), dsize = _dst.size();
865
    const int cn = _src.channels();
866
    const AT* wtab = (const AT*)_wtab;
867
    const T* S0 = _src.ptr<T>();
868
    size_t sstep = _src.step/sizeof(S0[0]);
869
    T cval[CV_CN_MAX];
870
    CastOp castOp;
871

872
    for(int k = 0; k < cn; k++ )
873
        cval[k] = saturate_cast<T>(_borderValue[k & 3]);
874

875
    int borderType1 = borderType != BORDER_TRANSPARENT ? borderType : BORDER_REFLECT_101;
876

877
    unsigned width1 = std::max(ssize.width-3, 0), height1 = std::max(ssize.height-3, 0);
878

879
    if( _dst.isContinuous() && _xy.isContinuous() && _fxy.isContinuous() )
880
    {
881
        dsize.width *= dsize.height;
882
        dsize.height = 1;
883
    }
884

885
    for(int dy = 0; dy < dsize.height; dy++ )
886
    {
887
        T* D = _dst.ptr<T>(dy);
888
        const short* XY = _xy.ptr<short>(dy);
889
        const ushort* FXY = _fxy.ptr<ushort>(dy);
890

891
        for(int dx = 0; dx < dsize.width; dx++, D += cn )
892
        {
893
            int sx = XY[dx*2]-1, sy = XY[dx*2+1]-1;
894
            const AT* w = wtab + FXY[dx]*16;
895
            if( (unsigned)sx < width1 && (unsigned)sy < height1 )
896
            {
897
                const T* S = S0 + sy*sstep + sx*cn;
898
                for(int k = 0; k < cn; k++ )
899
                {
900
                    WT sum = S[0]*w[0] + S[cn]*w[1] + S[cn*2]*w[2] + S[cn*3]*w[3];
901
                    S += sstep;
902
                    sum += S[0]*w[4] + S[cn]*w[5] + S[cn*2]*w[6] + S[cn*3]*w[7];
903
                    S += sstep;
904
                    sum += S[0]*w[8] + S[cn]*w[9] + S[cn*2]*w[10] + S[cn*3]*w[11];
905
                    S += sstep;
906
                    sum += S[0]*w[12] + S[cn]*w[13] + S[cn*2]*w[14] + S[cn*3]*w[15];
907
                    S += 1 - sstep*3;
908
                    D[k] = castOp(sum);
909
                }
910
            }
911
            else
912
            {
913
                int x[4], y[4];
914
                if( borderType == BORDER_TRANSPARENT &&
915
                    ((unsigned)(sx+1) >= (unsigned)ssize.width ||
916
                    (unsigned)(sy+1) >= (unsigned)ssize.height) )
917
                    continue;
918

919
                if( borderType1 == BORDER_CONSTANT &&
920
                    (sx >= ssize.width || sx+4 <= 0 ||
921
                    sy >= ssize.height || sy+4 <= 0))
922
                {
923
                    for(int k = 0; k < cn; k++ )
924
                        D[k] = cval[k];
925
                    continue;
926
                }
927

928
                for(int i = 0; i < 4; i++ )
929
                {
930
                    x[i] = borderInterpolate(sx + i, ssize.width, borderType1)*cn;
931
                    y[i] = borderInterpolate(sy + i, ssize.height, borderType1);
932
                }
933

934
                for(int k = 0; k < cn; k++, S0++, w -= 16 )
935
                {
936
                    WT cv = cval[k], sum = cv*ONE;
937
                    for(int i = 0; i < 4; i++, w += 4 )
938
                    {
939
                        int yi = y[i];
940
                        const T* S = S0 + yi*sstep;
941
                        if( yi < 0 )
942
                            continue;
943
                        if( x[0] >= 0 )
944
                            sum += (S[x[0]] - cv)*w[0];
945
                        if( x[1] >= 0 )
946
                            sum += (S[x[1]] - cv)*w[1];
947
                        if( x[2] >= 0 )
948
                            sum += (S[x[2]] - cv)*w[2];
949
                        if( x[3] >= 0 )
950
                            sum += (S[x[3]] - cv)*w[3];
951
                    }
952
                    D[k] = castOp(sum);
953
                }
954
                S0 -= cn;
955
            }
956
        }
957
    }
958
}
959

960

961
template<class CastOp, typename AT, int ONE>
962
static void remapLanczos4( const Mat& _src, Mat& _dst, const Mat& _xy,
963
                           const Mat& _fxy, const void* _wtab,
964
                           int borderType, const Scalar& _borderValue )
965
{
966
    typedef typename CastOp::rtype T;
967
    typedef typename CastOp::type1 WT;
968
    Size ssize = _src.size(), dsize = _dst.size();
969
    const int cn = _src.channels();
970
    const AT* wtab = (const AT*)_wtab;
971
    const T* S0 = _src.ptr<T>();
972
    size_t sstep = _src.step/sizeof(S0[0]);
973
    T cval[CV_CN_MAX];
974
    CastOp castOp;
975

976
    for(int k = 0; k < cn; k++ )
977
        cval[k] = saturate_cast<T>(_borderValue[k & 3]);
978

979
    int borderType1 = borderType != BORDER_TRANSPARENT ? borderType : BORDER_REFLECT_101;
980

981
    unsigned width1 = std::max(ssize.width-7, 0), height1 = std::max(ssize.height-7, 0);
982

983
    if( _dst.isContinuous() && _xy.isContinuous() && _fxy.isContinuous() )
984
    {
985
        dsize.width *= dsize.height;
986
        dsize.height = 1;
987
    }
988

989
    for(int dy = 0; dy < dsize.height; dy++ )
990
    {
991
        T* D = _dst.ptr<T>(dy);
992
        const short* XY = _xy.ptr<short>(dy);
993
        const ushort* FXY = _fxy.ptr<ushort>(dy);
994

995
        for(int dx = 0; dx < dsize.width; dx++, D += cn )
996
        {
997
            int sx = XY[dx*2]-3, sy = XY[dx*2+1]-3;
998
            const AT* w = wtab + FXY[dx]*64;
999
            const T* S = S0 + sy*sstep + sx*cn;
1000
            if( (unsigned)sx < width1 && (unsigned)sy < height1 )
1001
            {
1002
                for(int k = 0; k < cn; k++ )
1003
                {
1004
                    WT sum = 0;
1005
                    for( int r = 0; r < 8; r++, S += sstep, w += 8 )
1006
                        sum += S[0]*w[0] + S[cn]*w[1] + S[cn*2]*w[2] + S[cn*3]*w[3] +
1007
                            S[cn*4]*w[4] + S[cn*5]*w[5] + S[cn*6]*w[6] + S[cn*7]*w[7];
1008
                    w -= 64;
1009
                    S -= sstep*8 - 1;
1010
                    D[k] = castOp(sum);
1011
                }
1012
            }
1013
            else
1014
            {
1015
                int x[8], y[8];
1016
                if( borderType == BORDER_TRANSPARENT &&
1017
                    ((unsigned)(sx+3) >= (unsigned)ssize.width ||
1018
                    (unsigned)(sy+3) >= (unsigned)ssize.height) )
1019
                    continue;
1020

1021
                if( borderType1 == BORDER_CONSTANT &&
1022
                    (sx >= ssize.width || sx+8 <= 0 ||
1023
                    sy >= ssize.height || sy+8 <= 0))
1024
                {
1025
                    for(int k = 0; k < cn; k++ )
1026
                        D[k] = cval[k];
1027
                    continue;
1028
                }
1029

1030
                for(int i = 0; i < 8; i++ )
1031
                {
1032
                    x[i] = borderInterpolate(sx + i, ssize.width, borderType1)*cn;
1033
                    y[i] = borderInterpolate(sy + i, ssize.height, borderType1);
1034
                }
1035

1036
                for(int k = 0; k < cn; k++, S0++, w -= 64 )
1037
                {
1038
                    WT cv = cval[k], sum = cv*ONE;
1039
                    for(int i = 0; i < 8; i++, w += 8 )
1040
                    {
1041
                        int yi = y[i];
1042
                        const T* S1 = S0 + yi*sstep;
1043
                        if( yi < 0 )
1044
                            continue;
1045
                        if( x[0] >= 0 )
1046
                            sum += (S1[x[0]] - cv)*w[0];
1047
                        if( x[1] >= 0 )
1048
                            sum += (S1[x[1]] - cv)*w[1];
1049
                        if( x[2] >= 0 )
1050
                            sum += (S1[x[2]] - cv)*w[2];
1051
                        if( x[3] >= 0 )
1052
                            sum += (S1[x[3]] - cv)*w[3];
1053
                        if( x[4] >= 0 )
1054
                            sum += (S1[x[4]] - cv)*w[4];
1055
                        if( x[5] >= 0 )
1056
                            sum += (S1[x[5]] - cv)*w[5];
1057
                        if( x[6] >= 0 )
1058
                            sum += (S1[x[6]] - cv)*w[6];
1059
                        if( x[7] >= 0 )
1060
                            sum += (S1[x[7]] - cv)*w[7];
1061
                    }
1062
                    D[k] = castOp(sum);
1063
                }
1064
                S0 -= cn;
1065
            }
1066
        }
1067
    }
1068
}
1069

1070

1071
typedef void (*RemapNNFunc)(const Mat& _src, Mat& _dst, const Mat& _xy,
1072
                            int borderType, const Scalar& _borderValue );
1073

1074
typedef void (*RemapFunc)(const Mat& _src, Mat& _dst, const Mat& _xy,
1075
                          const Mat& _fxy, const void* _wtab,
1076
                          int borderType, const Scalar& _borderValue);
1077

1078
class RemapInvoker :
1079
    public ParallelLoopBody
1080
{
1081
public:
1082
    RemapInvoker(const Mat& _src, Mat& _dst, const Mat *_m1,
1083
                 const Mat *_m2, int _borderType, const Scalar &_borderValue,
1084
                 int _planar_input, RemapNNFunc _nnfunc, RemapFunc _ifunc, const void *_ctab) :
1085
        ParallelLoopBody(), src(&_src), dst(&_dst), m1(_m1), m2(_m2),
1086
        borderType(_borderType), borderValue(_borderValue),
1087
        planar_input(_planar_input), nnfunc(_nnfunc), ifunc(_ifunc), ctab(_ctab)
1088
    {
1089
    }
1090

1091
    virtual void operator() (const Range& range) const CV_OVERRIDE
1092
    {
1093
        int x, y, x1, y1;
1094
        const int buf_size = 1 << 14;
1095
        int brows0 = std::min(128, dst->rows), map_depth = m1->depth();
1096
        int bcols0 = std::min(buf_size/brows0, dst->cols);
1097
        brows0 = std::min(buf_size/bcols0, dst->rows);
1098
#if CV_SIMD128
1099
        bool useSIMD = hasSIMD128();
1100
#endif
1101

1102
        Mat _bufxy(brows0, bcols0, CV_16SC2), _bufa;
1103
        if( !nnfunc )
1104
            _bufa.create(brows0, bcols0, CV_16UC1);
1105

1106
        for( y = range.start; y < range.end; y += brows0 )
1107
        {
1108
            for( x = 0; x < dst->cols; x += bcols0 )
1109
            {
1110
                int brows = std::min(brows0, range.end - y);
1111
                int bcols = std::min(bcols0, dst->cols - x);
1112
                Mat dpart(*dst, Rect(x, y, bcols, brows));
1113
                Mat bufxy(_bufxy, Rect(0, 0, bcols, brows));
1114

1115
                if( nnfunc )
1116
                {
1117
                    if( m1->type() == CV_16SC2 && m2->empty() ) // the data is already in the right format
1118
                        bufxy = (*m1)(Rect(x, y, bcols, brows));
1119
                    else if( map_depth != CV_32F )
1120
                    {
1121
                        for( y1 = 0; y1 < brows; y1++ )
1122
                        {
1123
                            short* XY = bufxy.ptr<short>(y1);
1124
                            const short* sXY = m1->ptr<short>(y+y1) + x*2;
1125
                            const ushort* sA = m2->ptr<ushort>(y+y1) + x;
1126

1127
                            for( x1 = 0; x1 < bcols; x1++ )
1128
                            {
1129
                                int a = sA[x1] & (INTER_TAB_SIZE2-1);
1130
                                XY[x1*2] = sXY[x1*2] + NNDeltaTab_i[a][0];
1131
                                XY[x1*2+1] = sXY[x1*2+1] + NNDeltaTab_i[a][1];
1132
                            }
1133
                        }
1134
                    }
1135
                    else if( !planar_input )
1136
                        (*m1)(Rect(x, y, bcols, brows)).convertTo(bufxy, bufxy.depth());
1137
                    else
1138
                    {
1139
                        for( y1 = 0; y1 < brows; y1++ )
1140
                        {
1141
                            short* XY = bufxy.ptr<short>(y1);
1142
                            const float* sX = m1->ptr<float>(y+y1) + x;
1143
                            const float* sY = m2->ptr<float>(y+y1) + x;
1144
                            x1 = 0;
1145

1146
                            #if CV_SIMD128
1147
                            if( useSIMD )
1148
                            {
1149
                                int span = v_float32x4::nlanes;
1150
                                for( ; x1 <= bcols - span * 2; x1 += span * 2 )
1151
                                {
1152
                                    v_int32x4 ix0 = v_round(v_load(sX + x1));
1153
                                    v_int32x4 iy0 = v_round(v_load(sY + x1));
1154
                                    v_int32x4 ix1 = v_round(v_load(sX + x1 + span));
1155
                                    v_int32x4 iy1 = v_round(v_load(sY + x1 + span));
1156

1157
                                    v_int16x8 dx, dy;
1158
                                    dx = v_pack(ix0, ix1);
1159
                                    dy = v_pack(iy0, iy1);
1160
                                    v_store_interleave(XY + x1 * 2, dx, dy);
1161
                                }
1162
                            }
1163
                            #endif
1164
                            for( ; x1 < bcols; x1++ )
1165
                            {
1166
                                XY[x1*2] = saturate_cast<short>(sX[x1]);
1167
                                XY[x1*2+1] = saturate_cast<short>(sY[x1]);
1168
                            }
1169
                        }
1170
                    }
1171
                    nnfunc( *src, dpart, bufxy, borderType, borderValue );
1172
                    continue;
1173
                }
1174

1175
                Mat bufa(_bufa, Rect(0, 0, bcols, brows));
1176
                for( y1 = 0; y1 < brows; y1++ )
1177
                {
1178
                    short* XY = bufxy.ptr<short>(y1);
1179
                    ushort* A = bufa.ptr<ushort>(y1);
1180

1181
                    if( m1->type() == CV_16SC2 && (m2->type() == CV_16UC1 || m2->type() == CV_16SC1) )
1182
                    {
1183
                        bufxy = (*m1)(Rect(x, y, bcols, brows));
1184

1185
                        const ushort* sA = m2->ptr<ushort>(y+y1) + x;
1186
                        x1 = 0;
1187

1188
                        #if CV_SIMD128
1189
                        if (useSIMD)
1190
                        {
1191
                            v_uint16x8 v_scale = v_setall_u16(INTER_TAB_SIZE2 - 1);
1192
                            int span = v_uint16x8::nlanes;
1193
                            for( ; x1 <= bcols - span; x1 += span )
1194
                                v_store((unsigned short*)(A + x1), v_load(sA + x1) & v_scale);
1195
                        }
1196
                        #endif
1197
                        for( ; x1 < bcols; x1++ )
1198
                            A[x1] = (ushort)(sA[x1] & (INTER_TAB_SIZE2-1));
1199
                    }
1200
                    else if( planar_input )
1201
                    {
1202
                        const float* sX = m1->ptr<float>(y+y1) + x;
1203
                        const float* sY = m2->ptr<float>(y+y1) + x;
1204

1205
                        x1 = 0;
1206
                        #if CV_SIMD128
1207
                        if( useSIMD )
1208
                        {
1209
                            v_float32x4 v_scale = v_setall_f32((float)INTER_TAB_SIZE);
1210
                            v_int32x4 v_scale2 = v_setall_s32(INTER_TAB_SIZE - 1);
1211
                            int span = v_float32x4::nlanes;
1212
                            for( ; x1 <= bcols - span * 2; x1 += span * 2 )
1213
                            {
1214
                                v_int32x4 v_sx0 = v_round(v_scale * v_load(sX + x1));
1215
                                v_int32x4 v_sy0 = v_round(v_scale * v_load(sY + x1));
1216
                                v_int32x4 v_sx1 = v_round(v_scale * v_load(sX + x1 + span));
1217
                                v_int32x4 v_sy1 = v_round(v_scale * v_load(sY + x1 + span));
1218
                                v_uint16x8 v_sx8 = v_reinterpret_as_u16(v_pack(v_sx0 & v_scale2, v_sx1 & v_scale2));
1219
                                v_uint16x8 v_sy8 = v_reinterpret_as_u16(v_pack(v_sy0 & v_scale2, v_sy1 & v_scale2));
1220
                                v_uint16x8 v_v = v_shl<INTER_BITS>(v_sy8) | (v_sx8);
1221
                                v_store(A + x1, v_v);
1222

1223
                                v_int16x8 v_d0 = v_pack(v_shr<INTER_BITS>(v_sx0), v_shr<INTER_BITS>(v_sx1));
1224
                                v_int16x8 v_d1 = v_pack(v_shr<INTER_BITS>(v_sy0), v_shr<INTER_BITS>(v_sy1));
1225
                                v_store_interleave(XY + (x1 << 1), v_d0, v_d1);
1226
                            }
1227
                        }
1228
                        #endif
1229
                        for( ; x1 < bcols; x1++ )
1230
                        {
1231
                            int sx = cvRound(sX[x1]*INTER_TAB_SIZE);
1232
                            int sy = cvRound(sY[x1]*INTER_TAB_SIZE);
1233
                            int v = (sy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (sx & (INTER_TAB_SIZE-1));
1234
                            XY[x1*2] = saturate_cast<short>(sx >> INTER_BITS);
1235
                            XY[x1*2+1] = saturate_cast<short>(sy >> INTER_BITS);
1236
                            A[x1] = (ushort)v;
1237
                        }
1238
                    }
1239
                    else
1240
                    {
1241
                        const float* sXY = m1->ptr<float>(y+y1) + x*2;
1242
                        x1 = 0;
1243

1244
                        #if CV_SIMD128
1245
                        if( useSIMD )
1246
                        {
1247
                            v_float32x4 v_scale = v_setall_f32((float)INTER_TAB_SIZE);
1248
                            v_int32x4 v_scale2 = v_setall_s32(INTER_TAB_SIZE - 1), v_scale3 = v_setall_s32(INTER_TAB_SIZE);
1249
                            int span = v_float32x4::nlanes;
1250
                            for( ; x1 <= bcols - span * 2; x1 += span * 2 )
1251
                            {
1252
                                v_float32x4 v_fx, v_fy;
1253
                                v_load_deinterleave(sXY + (x1 << 1), v_fx, v_fy);
1254
                                v_int32x4 v_sx0 = v_round(v_fx * v_scale);
1255
                                v_int32x4 v_sy0 = v_round(v_fy * v_scale);
1256
                                v_load_deinterleave(sXY + ((x1 + span) << 1), v_fx, v_fy);
1257
                                v_int32x4 v_sx1 = v_round(v_fx * v_scale);
1258
                                v_int32x4 v_sy1 = v_round(v_fy * v_scale);
1259
                                v_int32x4 v_v0 = v_muladd(v_scale3, (v_sy0 & v_scale2), (v_sx0 & v_scale2));
1260
                                v_int32x4 v_v1 = v_muladd(v_scale3, (v_sy1 & v_scale2), (v_sx1 & v_scale2));
1261
                                v_uint16x8 v_v8 = v_reinterpret_as_u16(v_pack(v_v0, v_v1));
1262
                                v_store(A + x1, v_v8);
1263
                                v_int16x8 v_dx = v_pack(v_shr<INTER_BITS>(v_sx0), v_shr<INTER_BITS>(v_sx1));
1264
                                v_int16x8 v_dy = v_pack(v_shr<INTER_BITS>(v_sy0), v_shr<INTER_BITS>(v_sy1));
1265
                                v_store_interleave(XY + (x1 << 1), v_dx, v_dy);
1266
                            }
1267
                        }
1268
                        #endif
1269

1270
                        for( ; x1 < bcols; x1++ )
1271
                        {
1272
                            int sx = cvRound(sXY[x1*2]*INTER_TAB_SIZE);
1273
                            int sy = cvRound(sXY[x1*2+1]*INTER_TAB_SIZE);
1274
                            int v = (sy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (sx & (INTER_TAB_SIZE-1));
1275
                            XY[x1*2] = saturate_cast<short>(sx >> INTER_BITS);
1276
                            XY[x1*2+1] = saturate_cast<short>(sy >> INTER_BITS);
1277
                            A[x1] = (ushort)v;
1278
                        }
1279
                    }
1280
                }
1281
                ifunc(*src, dpart, bufxy, bufa, ctab, borderType, borderValue);
1282
            }
1283
        }
1284
    }
1285

1286
private:
1287
    const Mat* src;
1288
    Mat* dst;
1289
    const Mat *m1, *m2;
1290
    int borderType;
1291
    Scalar borderValue;
1292
    int planar_input;
1293
    RemapNNFunc nnfunc;
1294
    RemapFunc ifunc;
1295
    const void *ctab;
1296
};
1297

1298
#ifdef HAVE_OPENCL
1299

1300
static bool ocl_remap(InputArray _src, OutputArray _dst, InputArray _map1, InputArray _map2,
1301
                      int interpolation, int borderType, const Scalar& borderValue)
1302
{
1303
    const ocl::Device & dev = ocl::Device::getDefault();
1304
    int cn = _src.channels(), type = _src.type(), depth = _src.depth(),
1305
            rowsPerWI = dev.isIntel() ? 4 : 1;
1306

1307
    if (borderType == BORDER_TRANSPARENT || !(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST)
1308
            || _map1.type() == CV_16SC1 || _map2.type() == CV_16SC1)
1309
        return false;
1310

1311
    UMat src = _src.getUMat(), map1 = _map1.getUMat(), map2 = _map2.getUMat();
1312

1313
    if( (map1.type() == CV_16SC2 && (map2.type() == CV_16UC1 || map2.empty())) ||
1314
        (map2.type() == CV_16SC2 && (map1.type() == CV_16UC1 || map1.empty())) )
1315
    {
1316
        if (map1.type() != CV_16SC2)
1317
            std::swap(map1, map2);
1318
    }
1319
    else
1320
        CV_Assert( map1.type() == CV_32FC2 || (map1.type() == CV_32FC1 && map2.type() == CV_32FC1) );
1321

1322
    _dst.create(map1.size(), type);
1323
    UMat dst = _dst.getUMat();
1324

1325
    String kernelName = "remap";
1326
    if (map1.type() == CV_32FC2 && map2.empty())
1327
        kernelName += "_32FC2";
1328
    else if (map1.type() == CV_16SC2)
1329
    {
1330
        kernelName += "_16SC2";
1331
        if (!map2.empty())
1332
            kernelName += "_16UC1";
1333
    }
1334
    else if (map1.type() == CV_32FC1 && map2.type() == CV_32FC1)
1335
        kernelName += "_2_32FC1";
1336
    else
1337
        CV_Error(Error::StsBadArg, "Unsupported map types");
1338

1339
    static const char * const interMap[] = { "INTER_NEAREST", "INTER_LINEAR", "INTER_CUBIC", "INTER_LINEAR", "INTER_LANCZOS" };
1340
    static const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP",
1341
                           "BORDER_REFLECT_101", "BORDER_TRANSPARENT" };
1342
    String buildOptions = format("-D %s -D %s -D T=%s -D rowsPerWI=%d",
1343
                                 interMap[interpolation], borderMap[borderType],
1344
                                 ocl::typeToStr(type), rowsPerWI);
1345

1346
    if (interpolation != INTER_NEAREST)
1347
    {
1348
        char cvt[3][40];
1349
        int wdepth = std::max(CV_32F, depth);
1350
        buildOptions = buildOptions
1351
                      + format(" -D WT=%s -D convertToT=%s -D convertToWT=%s"
1352
                               " -D convertToWT2=%s -D WT2=%s",
1353
                               ocl::typeToStr(CV_MAKE_TYPE(wdepth, cn)),
1354
                               ocl::convertTypeStr(wdepth, depth, cn, cvt[0]),
1355
                               ocl::convertTypeStr(depth, wdepth, cn, cvt[1]),
1356
                               ocl::convertTypeStr(CV_32S, wdepth, 2, cvt[2]),
1357
                               ocl::typeToStr(CV_MAKE_TYPE(wdepth, 2)));
1358
    }
1359
    int scalarcn = cn == 3 ? 4 : cn;
1360
    int sctype = CV_MAKETYPE(depth, scalarcn);
1361
    buildOptions += format(" -D T=%s -D T1=%s -D cn=%d -D ST=%s -D depth=%d",
1362
                           ocl::typeToStr(type), ocl::typeToStr(depth),
1363
                           cn, ocl::typeToStr(sctype), depth);
1364

1365
    ocl::Kernel k(kernelName.c_str(), ocl::imgproc::remap_oclsrc, buildOptions);
1366

1367
    Mat scalar(1, 1, sctype, borderValue);
1368
    ocl::KernelArg srcarg = ocl::KernelArg::ReadOnly(src), dstarg = ocl::KernelArg::WriteOnly(dst),
1369
            map1arg = ocl::KernelArg::ReadOnlyNoSize(map1),
1370
            scalararg = ocl::KernelArg::Constant((void*)scalar.ptr(), scalar.elemSize());
1371

1372
    if (map2.empty())
1373
        k.args(srcarg, dstarg, map1arg, scalararg);
1374
    else
1375
        k.args(srcarg, dstarg, map1arg, ocl::KernelArg::ReadOnlyNoSize(map2), scalararg);
1376

1377
    size_t globalThreads[2] = { (size_t)dst.cols, ((size_t)dst.rows + rowsPerWI - 1) / rowsPerWI };
1378
    return k.run(2, globalThreads, NULL, false);
1379
}
1380

1381
#if 0
1382
/**
1383
@deprecated with old version of cv::linearPolar
1384
*/
1385
static bool ocl_linearPolar(InputArray _src, OutputArray _dst,
1386
    Point2f center, double maxRadius, int flags)
1387
{
1388
    UMat src_with_border; // don't scope this variable (it holds image data)
1389

1390
    UMat mapx, mapy, r, cp_sp;
1391
    UMat src = _src.getUMat();
1392
    _dst.create(src.size(), src.type());
1393
    Size dsize = src.size();
1394
    r.create(Size(1, dsize.width), CV_32F);
1395
    cp_sp.create(Size(1, dsize.height), CV_32FC2);
1396

1397
    mapx.create(dsize, CV_32F);
1398
    mapy.create(dsize, CV_32F);
1399
    size_t w = dsize.width;
1400
    size_t h = dsize.height;
1401
    String buildOptions;
1402
    unsigned mem_size = 32;
1403
    if (flags & CV_WARP_INVERSE_MAP)
1404
    {
1405
        buildOptions = "-D InverseMap";
1406
    }
1407
    else
1408
    {
1409
        buildOptions = format("-D ForwardMap  -D MEM_SIZE=%d", mem_size);
1410
    }
1411
    String retval;
1412
    ocl::Program p(ocl::imgproc::linearPolar_oclsrc, buildOptions, retval);
1413
    ocl::Kernel k("linearPolar", p);
1414
    ocl::KernelArg ocl_mapx = ocl::KernelArg::PtrReadWrite(mapx), ocl_mapy = ocl::KernelArg::PtrReadWrite(mapy);
1415
    ocl::KernelArg  ocl_cp_sp = ocl::KernelArg::PtrReadWrite(cp_sp);
1416
    ocl::KernelArg ocl_r = ocl::KernelArg::PtrReadWrite(r);
1417

1418
    if (!(flags & CV_WARP_INVERSE_MAP))
1419
    {
1420

1421

1422

1423
        ocl::Kernel computeAngleRadius_Kernel("computeAngleRadius", p);
1424
        float PI2_height = (float) CV_2PI / dsize.height;
1425
        float maxRadius_width = (float) maxRadius / dsize.width;
1426
        computeAngleRadius_Kernel.args(ocl_cp_sp, ocl_r, maxRadius_width, PI2_height, (unsigned)dsize.width, (unsigned)dsize.height);
1427
        size_t max_dim = max(h, w);
1428
        computeAngleRadius_Kernel.run(1, &max_dim, NULL, false);
1429
        k.args(ocl_mapx, ocl_mapy, ocl_cp_sp, ocl_r, center.x, center.y, (unsigned)dsize.width, (unsigned)dsize.height);
1430
    }
1431
    else
1432
    {
1433
        const int ANGLE_BORDER = 1;
1434

1435
        cv::copyMakeBorder(src, src_with_border, ANGLE_BORDER, ANGLE_BORDER, 0, 0, BORDER_WRAP);
1436
        src = src_with_border;
1437
        Size ssize = src_with_border.size();
1438
        ssize.height -= 2 * ANGLE_BORDER;
1439
        float ascale =  ssize.height / ((float)CV_2PI);
1440
        float pscale =  ssize.width / ((float) maxRadius);
1441

1442
        k.args(ocl_mapx, ocl_mapy, ascale, pscale, center.x, center.y, ANGLE_BORDER, (unsigned)dsize.width, (unsigned)dsize.height);
1443

1444

1445
    }
1446
    size_t globalThreads[2] = { (size_t)dsize.width , (size_t)dsize.height };
1447
    size_t localThreads[2] = { mem_size , mem_size };
1448
    k.run(2, globalThreads, localThreads, false);
1449
    remap(src, _dst, mapx, mapy, flags & cv::INTER_MAX, (flags & CV_WARP_FILL_OUTLIERS) ? cv::BORDER_CONSTANT : cv::BORDER_TRANSPARENT);
1450
    return true;
1451
}
1452
static bool ocl_logPolar(InputArray _src, OutputArray _dst,
1453
    Point2f center, double M, int flags)
1454
{
1455
    if (M <= 0)
1456
        CV_Error(CV_StsOutOfRange, "M should be >0");
1457
    UMat src_with_border; // don't scope this variable (it holds image data)
1458

1459
    UMat mapx, mapy, r, cp_sp;
1460
    UMat src = _src.getUMat();
1461
    _dst.create(src.size(), src.type());
1462
    Size dsize = src.size();
1463
    r.create(Size(1, dsize.width), CV_32F);
1464
    cp_sp.create(Size(1, dsize.height), CV_32FC2);
1465

1466
    mapx.create(dsize, CV_32F);
1467
    mapy.create(dsize, CV_32F);
1468
    size_t w = dsize.width;
1469
    size_t h = dsize.height;
1470
    String buildOptions;
1471
    unsigned mem_size = 32;
1472
    if (flags & CV_WARP_INVERSE_MAP)
1473
    {
1474
        buildOptions = "-D InverseMap";
1475
    }
1476
    else
1477
    {
1478
        buildOptions = format("-D ForwardMap  -D MEM_SIZE=%d", mem_size);
1479
    }
1480
    String retval;
1481
    ocl::Program p(ocl::imgproc::logPolar_oclsrc, buildOptions, retval);
1482
    //ocl::Program p(ocl::imgproc::my_linearPolar_oclsrc, buildOptions, retval);
1483
    //printf("%s\n", retval);
1484
    ocl::Kernel k("logPolar", p);
1485
    ocl::KernelArg ocl_mapx = ocl::KernelArg::PtrReadWrite(mapx), ocl_mapy = ocl::KernelArg::PtrReadWrite(mapy);
1486
    ocl::KernelArg  ocl_cp_sp = ocl::KernelArg::PtrReadWrite(cp_sp);
1487
    ocl::KernelArg ocl_r = ocl::KernelArg::PtrReadWrite(r);
1488

1489
    if (!(flags & CV_WARP_INVERSE_MAP))
1490
    {
1491

1492

1493

1494
        ocl::Kernel computeAngleRadius_Kernel("computeAngleRadius", p);
1495
        float PI2_height = (float) CV_2PI / dsize.height;
1496

1497
        computeAngleRadius_Kernel.args(ocl_cp_sp, ocl_r, (float)M, PI2_height, (unsigned)dsize.width, (unsigned)dsize.height);
1498
        size_t max_dim = max(h, w);
1499
        computeAngleRadius_Kernel.run(1, &max_dim, NULL, false);
1500
        k.args(ocl_mapx, ocl_mapy, ocl_cp_sp, ocl_r, center.x, center.y, (unsigned)dsize.width, (unsigned)dsize.height);
1501
    }
1502
    else
1503
    {
1504
        const int ANGLE_BORDER = 1;
1505

1506
        cv::copyMakeBorder(src, src_with_border, ANGLE_BORDER, ANGLE_BORDER, 0, 0, BORDER_WRAP);
1507
        src = src_with_border;
1508
        Size ssize = src_with_border.size();
1509
        ssize.height -= 2 * ANGLE_BORDER;
1510
        float ascale =  ssize.height / ((float)CV_2PI);
1511

1512

1513
        k.args(ocl_mapx, ocl_mapy, ascale, (float)M, center.x, center.y, ANGLE_BORDER, (unsigned)dsize.width, (unsigned)dsize.height);
1514

1515

1516
    }
1517
    size_t globalThreads[2] = { (size_t)dsize.width , (size_t)dsize.height };
1518
    size_t localThreads[2] = { mem_size , mem_size };
1519
    k.run(2, globalThreads, localThreads, false);
1520
    remap(src, _dst, mapx, mapy, flags & cv::INTER_MAX, (flags & CV_WARP_FILL_OUTLIERS) ? cv::BORDER_CONSTANT : cv::BORDER_TRANSPARENT);
1521
    return true;
1522
}
1523
#endif
1524

1525
#endif
1526

1527
#ifdef HAVE_OPENVX
1528
static bool openvx_remap(Mat src, Mat dst, Mat map1, Mat map2, int interpolation, const Scalar& borderValue)
1529
{
1530
    vx_interpolation_type_e inter_type;
1531
    switch (interpolation)
1532
    {
1533
    case INTER_LINEAR:
1534
#if VX_VERSION > VX_VERSION_1_0
1535
        inter_type = VX_INTERPOLATION_BILINEAR;
1536
#else
1537
        inter_type = VX_INTERPOLATION_TYPE_BILINEAR;
1538
#endif
1539
        break;
1540
    case INTER_NEAREST:
1541
/* NEAREST_NEIGHBOR mode disabled since OpenCV round half to even while OpenVX sample implementation round half up
1542
#if VX_VERSION > VX_VERSION_1_0
1543
        inter_type = VX_INTERPOLATION_NEAREST_NEIGHBOR;
1544
#else
1545
        inter_type = VX_INTERPOLATION_TYPE_NEAREST_NEIGHBOR;
1546
#endif
1547
        if (!map1.empty())
1548
            for (int y = 0; y < map1.rows; ++y)
1549
            {
1550
                float* line = map1.ptr<float>(y);
1551
                for (int x = 0; x < map1.cols; ++x)
1552
                    line[x] = cvRound(line[x]);
1553
            }
1554
        if (!map2.empty())
1555
            for (int y = 0; y < map2.rows; ++y)
1556
            {
1557
                float* line = map2.ptr<float>(y);
1558
                for (int x = 0; x < map2.cols; ++x)
1559
                    line[x] = cvRound(line[x]);
1560
            }
1561
        break;
1562
*/
1563
    case INTER_AREA://AREA interpolation mode is unsupported
1564
    default:
1565
        return false;
1566
    }
1567

1568
    try
1569
    {
1570
        ivx::Context ctx = ovx::getOpenVXContext();
1571

1572
        Mat a;
1573
        if (dst.data != src.data)
1574
            a = src;
1575
        else
1576
            src.copyTo(a);
1577

1578
        ivx::Image
1579
            ia = ivx::Image::createFromHandle(ctx, VX_DF_IMAGE_U8,
1580
                ivx::Image::createAddressing(a.cols, a.rows, 1, (vx_int32)(a.step)), a.data),
1581
            ib = ivx::Image::createFromHandle(ctx, VX_DF_IMAGE_U8,
1582
                ivx::Image::createAddressing(dst.cols, dst.rows, 1, (vx_int32)(dst.step)), dst.data);
1583

1584
        //ATTENTION: VX_CONTEXT_IMMEDIATE_BORDER attribute change could lead to strange issues in multi-threaded environments
1585
        //since OpenVX standard says nothing about thread-safety for now
1586
        ivx::border_t prevBorder = ctx.immediateBorder();
1587
        ctx.setImmediateBorder(VX_BORDER_CONSTANT, (vx_uint8)(borderValue[0]));
1588

1589
        ivx::Remap map = ivx::Remap::create(ctx, src.cols, src.rows, dst.cols, dst.rows);
1590
        if (map1.empty()) map.setMappings(map2);
1591
        else if (map2.empty()) map.setMappings(map1);
1592
        else map.setMappings(map1, map2);
1593
        ivx::IVX_CHECK_STATUS(vxuRemap(ctx, ia, map, inter_type, ib));
1594
#ifdef VX_VERSION_1_1
1595
        ib.swapHandle();
1596
        ia.swapHandle();
1597
#endif
1598

1599
        ctx.setImmediateBorder(prevBorder);
1600
    }
1601
    catch (ivx::RuntimeError & e)
1602
    {
1603
        CV_Error(CV_StsInternal, e.what());
1604
        return false;
1605
    }
1606
    catch (ivx::WrapperError & e)
1607
    {
1608
        CV_Error(CV_StsInternal, e.what());
1609
        return false;
1610
    }
1611
    return true;
1612
}
1613
#endif
1614

1615
#if defined HAVE_IPP && !IPP_DISABLE_REMAP
1616

1617
typedef IppStatus (CV_STDCALL * ippiRemap)(const void * pSrc, IppiSize srcSize, int srcStep, IppiRect srcRoi,
1618
                                           const Ipp32f* pxMap, int xMapStep, const Ipp32f* pyMap, int yMapStep,
1619
                                           void * pDst, int dstStep, IppiSize dstRoiSize, int interpolation);
1620

1621
class IPPRemapInvoker :
1622
        public ParallelLoopBody
1623
{
1624
public:
1625
    IPPRemapInvoker(Mat & _src, Mat & _dst, Mat & _xmap, Mat & _ymap, ippiRemap _ippFunc,
1626
                    int _ippInterpolation, int _borderType, const Scalar & _borderValue, bool * _ok) :
1627
        ParallelLoopBody(), src(_src), dst(_dst), map1(_xmap), map2(_ymap), ippFunc(_ippFunc),
1628
        ippInterpolation(_ippInterpolation), borderType(_borderType), borderValue(_borderValue), ok(_ok)
1629
    {
1630
        *ok = true;
1631
    }
1632

1633
    virtual void operator() (const Range & range) const
1634
    {
1635
        IppiRect srcRoiRect = { 0, 0, src.cols, src.rows };
1636
        Mat dstRoi = dst.rowRange(range);
1637
        IppiSize dstRoiSize = ippiSize(dstRoi.size());
1638
        int type = dst.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
1639

1640
        if (borderType == BORDER_CONSTANT &&
1641
                !IPPSet(borderValue, dstRoi.ptr(), (int)dstRoi.step, dstRoiSize, cn, depth))
1642
        {
1643
            *ok = false;
1644
            return;
1645
        }
1646

1647
        if (CV_INSTRUMENT_FUN_IPP(ippFunc, src.ptr(), ippiSize(src.size()), (int)src.step, srcRoiRect,
1648
                    map1.ptr<Ipp32f>(), (int)map1.step, map2.ptr<Ipp32f>(), (int)map2.step,
1649
                    dstRoi.ptr(), (int)dstRoi.step, dstRoiSize, ippInterpolation) < 0)
1650
            *ok = false;
1651
        else
1652
        {
1653
            CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
1654
        }
1655
    }
1656

1657
private:
1658
    Mat & src, & dst, & map1, & map2;
1659
    ippiRemap ippFunc;
1660
    int ippInterpolation, borderType;
1661
    Scalar borderValue;
1662
    bool * ok;
1663
};
1664

1665
#endif
1666

1667
}
1668

1669
void cv::remap( InputArray _src, OutputArray _dst,
1670
                InputArray _map1, InputArray _map2,
1671
                int interpolation, int borderType, const Scalar& borderValue )
1672
{
1673
    CV_INSTRUMENT_REGION();
1674

1675
    static RemapNNFunc nn_tab[] =
1676
    {
1677
        remapNearest<uchar>, remapNearest<schar>, remapNearest<ushort>, remapNearest<short>,
1678
        remapNearest<int>, remapNearest<float>, remapNearest<double>, 0
1679
    };
1680

1681
    static RemapFunc linear_tab[] =
1682
    {
1683
        remapBilinear<FixedPtCast<int, uchar, INTER_REMAP_COEF_BITS>, RemapVec_8u, short>, 0,
1684
        remapBilinear<Cast<float, ushort>, RemapNoVec, float>,
1685
        remapBilinear<Cast<float, short>, RemapNoVec, float>, 0,
1686
        remapBilinear<Cast<float, float>, RemapNoVec, float>,
1687
        remapBilinear<Cast<double, double>, RemapNoVec, float>, 0
1688
    };
1689

1690
    static RemapFunc cubic_tab[] =
1691
    {
1692
        remapBicubic<FixedPtCast<int, uchar, INTER_REMAP_COEF_BITS>, short, INTER_REMAP_COEF_SCALE>, 0,
1693
        remapBicubic<Cast<float, ushort>, float, 1>,
1694
        remapBicubic<Cast<float, short>, float, 1>, 0,
1695
        remapBicubic<Cast<float, float>, float, 1>,
1696
        remapBicubic<Cast<double, double>, float, 1>, 0
1697
    };
1698

1699
    static RemapFunc lanczos4_tab[] =
1700
    {
1701
        remapLanczos4<FixedPtCast<int, uchar, INTER_REMAP_COEF_BITS>, short, INTER_REMAP_COEF_SCALE>, 0,
1702
        remapLanczos4<Cast<float, ushort>, float, 1>,
1703
        remapLanczos4<Cast<float, short>, float, 1>, 0,
1704
        remapLanczos4<Cast<float, float>, float, 1>,
1705
        remapLanczos4<Cast<double, double>, float, 1>, 0
1706
    };
1707

1708
    CV_Assert( !_map1.empty() );
1709
    CV_Assert( _map2.empty() || (_map2.size() == _map1.size()));
1710

1711
    CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
1712
               ocl_remap(_src, _dst, _map1, _map2, interpolation, borderType, borderValue))
1713

1714
    Mat src = _src.getMat(), map1 = _map1.getMat(), map2 = _map2.getMat();
1715
    _dst.create( map1.size(), src.type() );
1716
    Mat dst = _dst.getMat();
1717

1718

1719
    CV_OVX_RUN(
1720
        src.type() == CV_8UC1 && dst.type() == CV_8UC1 &&
1721
        !ovx::skipSmallImages<VX_KERNEL_REMAP>(src.cols, src.rows) &&
1722
        (borderType& ~BORDER_ISOLATED) == BORDER_CONSTANT &&
1723
        ((map1.type() == CV_32FC2 && map2.empty() && map1.size == dst.size) ||
1724
         (map1.type() == CV_32FC1 && map2.type() == CV_32FC1 && map1.size == dst.size && map2.size == dst.size) ||
1725
         (map1.empty() && map2.type() == CV_32FC2 && map2.size == dst.size)) &&
1726
        ((borderType & BORDER_ISOLATED) != 0 || !src.isSubmatrix()),
1727
        openvx_remap(src, dst, map1, map2, interpolation, borderValue));
1728

1729
    CV_Assert( dst.cols < SHRT_MAX && dst.rows < SHRT_MAX && src.cols < SHRT_MAX && src.rows < SHRT_MAX );
1730

1731
    if( dst.data == src.data )
1732
        src = src.clone();
1733

1734
    if( interpolation == INTER_AREA )
1735
        interpolation = INTER_LINEAR;
1736

1737
    int type = src.type(), depth = CV_MAT_DEPTH(type);
1738

1739
#if defined HAVE_IPP && !IPP_DISABLE_REMAP
1740
    CV_IPP_CHECK()
1741
    {
1742
        if ((interpolation == INTER_LINEAR || interpolation == INTER_CUBIC || interpolation == INTER_NEAREST) &&
1743
                map1.type() == CV_32FC1 && map2.type() == CV_32FC1 &&
1744
                (borderType == BORDER_CONSTANT || borderType == BORDER_TRANSPARENT))
1745
        {
1746
            int ippInterpolation =
1747
                interpolation == INTER_NEAREST ? IPPI_INTER_NN :
1748
                interpolation == INTER_LINEAR ? IPPI_INTER_LINEAR : IPPI_INTER_CUBIC;
1749

1750
            ippiRemap ippFunc =
1751
                type == CV_8UC1 ? (ippiRemap)ippiRemap_8u_C1R :
1752
                type == CV_8UC3 ? (ippiRemap)ippiRemap_8u_C3R :
1753
                type == CV_8UC4 ? (ippiRemap)ippiRemap_8u_C4R :
1754
                type == CV_16UC1 ? (ippiRemap)ippiRemap_16u_C1R :
1755
                type == CV_16UC3 ? (ippiRemap)ippiRemap_16u_C3R :
1756
                type == CV_16UC4 ? (ippiRemap)ippiRemap_16u_C4R :
1757
                type == CV_32FC1 ? (ippiRemap)ippiRemap_32f_C1R :
1758
                type == CV_32FC3 ? (ippiRemap)ippiRemap_32f_C3R :
1759
                type == CV_32FC4 ? (ippiRemap)ippiRemap_32f_C4R : 0;
1760

1761
            if (ippFunc)
1762
            {
1763
                bool ok;
1764
                IPPRemapInvoker invoker(src, dst, map1, map2, ippFunc, ippInterpolation,
1765
                                        borderType, borderValue, &ok);
1766
                Range range(0, dst.rows);
1767
                parallel_for_(range, invoker, dst.total() / (double)(1 << 16));
1768

1769
                if (ok)
1770
                {
1771
                    CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
1772
                    return;
1773
                }
1774
                setIppErrorStatus();
1775
            }
1776
        }
1777
    }
1778
#endif
1779

1780
    RemapNNFunc nnfunc = 0;
1781
    RemapFunc ifunc = 0;
1782
    const void* ctab = 0;
1783
    bool fixpt = depth == CV_8U;
1784
    bool planar_input = false;
1785

1786
    if( interpolation == INTER_NEAREST )
1787
    {
1788
        nnfunc = nn_tab[depth];
1789
        CV_Assert( nnfunc != 0 );
1790
    }
1791
    else
1792
    {
1793
        if( interpolation == INTER_LINEAR )
1794
            ifunc = linear_tab[depth];
1795
        else if( interpolation == INTER_CUBIC ){
1796
            ifunc = cubic_tab[depth];
1797
            CV_Assert( _src.channels() <= 4 );
1798
        }
1799
        else if( interpolation == INTER_LANCZOS4 ){
1800
            ifunc = lanczos4_tab[depth];
1801
            CV_Assert( _src.channels() <= 4 );
1802
        }
1803
        else
1804
            CV_Error( CV_StsBadArg, "Unknown interpolation method" );
1805
        CV_Assert( ifunc != 0 );
1806
        ctab = initInterTab2D( interpolation, fixpt );
1807
    }
1808

1809
    const Mat *m1 = &map1, *m2 = &map2;
1810

1811
    if( (map1.type() == CV_16SC2 && (map2.type() == CV_16UC1 || map2.type() == CV_16SC1 || map2.empty())) ||
1812
        (map2.type() == CV_16SC2 && (map1.type() == CV_16UC1 || map1.type() == CV_16SC1 || map1.empty())) )
1813
    {
1814
        if( map1.type() != CV_16SC2 )
1815
            std::swap(m1, m2);
1816
    }
1817
    else
1818
    {
1819
        CV_Assert( ((map1.type() == CV_32FC2 || map1.type() == CV_16SC2) && map2.empty()) ||
1820
            (map1.type() == CV_32FC1 && map2.type() == CV_32FC1) );
1821
        planar_input = map1.channels() == 1;
1822
    }
1823

1824
    RemapInvoker invoker(src, dst, m1, m2,
1825
                         borderType, borderValue, planar_input, nnfunc, ifunc,
1826
                         ctab);
1827
    parallel_for_(Range(0, dst.rows), invoker, dst.total()/(double)(1<<16));
1828
}
1829

1830

1831
void cv::convertMaps( InputArray _map1, InputArray _map2,
1832
                      OutputArray _dstmap1, OutputArray _dstmap2,
1833
                      int dstm1type, bool nninterpolate )
1834
{
1835
    CV_INSTRUMENT_REGION();
1836

1837
    Mat map1 = _map1.getMat(), map2 = _map2.getMat(), dstmap1, dstmap2;
1838
    Size size = map1.size();
1839
    const Mat *m1 = &map1, *m2 = &map2;
1840
    int m1type = m1->type(), m2type = m2->type();
1841

1842
    CV_Assert( (m1type == CV_16SC2 && (nninterpolate || m2type == CV_16UC1 || m2type == CV_16SC1)) ||
1843
               (m2type == CV_16SC2 && (nninterpolate || m1type == CV_16UC1 || m1type == CV_16SC1)) ||
1844
               (m1type == CV_32FC1 && m2type == CV_32FC1) ||
1845
               (m1type == CV_32FC2 && m2->empty()) );
1846

1847
    if( m2type == CV_16SC2 )
1848
    {
1849
        std::swap( m1, m2 );
1850
        std::swap( m1type, m2type );
1851
    }
1852

1853
    if( dstm1type <= 0 )
1854
        dstm1type = m1type == CV_16SC2 ? CV_32FC2 : CV_16SC2;
1855
    CV_Assert( dstm1type == CV_16SC2 || dstm1type == CV_32FC1 || dstm1type == CV_32FC2 );
1856
    _dstmap1.create( size, dstm1type );
1857
    dstmap1 = _dstmap1.getMat();
1858

1859
    if( !nninterpolate && dstm1type != CV_32FC2 )
1860
    {
1861
        _dstmap2.create( size, dstm1type == CV_16SC2 ? CV_16UC1 : CV_32FC1 );
1862
        dstmap2 = _dstmap2.getMat();
1863
    }
1864
    else
1865
        _dstmap2.release();
1866

1867
    if( m1type == dstm1type || (nninterpolate &&
1868
        ((m1type == CV_16SC2 && dstm1type == CV_32FC2) ||
1869
        (m1type == CV_32FC2 && dstm1type == CV_16SC2))) )
1870
    {
1871
        m1->convertTo( dstmap1, dstmap1.type() );
1872
        if( !dstmap2.empty() && dstmap2.type() == m2->type() )
1873
            m2->copyTo( dstmap2 );
1874
        return;
1875
    }
1876

1877
    if( m1type == CV_32FC1 && dstm1type == CV_32FC2 )
1878
    {
1879
        Mat vdata[] = { *m1, *m2 };
1880
        merge( vdata, 2, dstmap1 );
1881
        return;
1882
    }
1883

1884
    if( m1type == CV_32FC2 && dstm1type == CV_32FC1 )
1885
    {
1886
        Mat mv[] = { dstmap1, dstmap2 };
1887
        split( *m1, mv );
1888
        return;
1889
    }
1890

1891
    if( m1->isContinuous() && (m2->empty() || m2->isContinuous()) &&
1892
        dstmap1.isContinuous() && (dstmap2.empty() || dstmap2.isContinuous()) )
1893
    {
1894
        size.width *= size.height;
1895
        size.height = 1;
1896
    }
1897

1898
#if CV_SIMD128
1899
    bool useSIMD = hasSIMD128();
1900
#endif
1901
#if CV_TRY_SSE4_1
1902
    bool useSSE4_1 = CV_CPU_HAS_SUPPORT_SSE4_1;
1903
#endif
1904

1905
    const float scale = 1.f/INTER_TAB_SIZE;
1906
    int x, y;
1907
    for( y = 0; y < size.height; y++ )
1908
    {
1909
        const float* src1f = m1->ptr<float>(y);
1910
        const float* src2f = m2->ptr<float>(y);
1911
        const short* src1 = (const short*)src1f;
1912
        const ushort* src2 = (const ushort*)src2f;
1913

1914
        float* dst1f = dstmap1.ptr<float>(y);
1915
        float* dst2f = dstmap2.ptr<float>(y);
1916
        short* dst1 = (short*)dst1f;
1917
        ushort* dst2 = (ushort*)dst2f;
1918
        x = 0;
1919

1920
        if( m1type == CV_32FC1 && dstm1type == CV_16SC2 )
1921
        {
1922
            if( nninterpolate )
1923
            {
1924
                #if CV_TRY_SSE4_1
1925
                if (useSSE4_1)
1926
                    opt_SSE4_1::convertMaps_nninterpolate32f1c16s_SSE41(src1f, src2f, dst1, size.width);
1927
                else
1928
                #endif
1929
                {
1930
                    #if CV_SIMD128
1931
                    if( useSIMD )
1932
                    {
1933
                        int span = v_int16x8::nlanes;
1934
                        for( ; x <= size.width - span; x += span )
1935
                        {
1936
                            v_int16x8 v_dst[2];
1937
                            #define CV_PACK_MAP(X) v_pack(v_round(v_load(X)), v_round(v_load((X)+4)))
1938
                            v_dst[0] = CV_PACK_MAP(src1f + x);
1939
                            v_dst[1] = CV_PACK_MAP(src2f + x);
1940
                            #undef CV_PACK_MAP
1941
                            v_store_interleave(dst1 + (x << 1), v_dst[0], v_dst[1]);
1942
                        }
1943
                    }
1944
                    #endif
1945
                    for( ; x < size.width; x++ )
1946
                    {
1947
                        dst1[x*2] = saturate_cast<short>(src1f[x]);
1948
                        dst1[x*2+1] = saturate_cast<short>(src2f[x]);
1949
                    }
1950
                }
1951
            }
1952
            else
1953
            {
1954
                #if CV_TRY_SSE4_1
1955
                if (useSSE4_1)
1956
                    opt_SSE4_1::convertMaps_32f1c16s_SSE41(src1f, src2f, dst1, dst2, size.width);
1957
                else
1958
                #endif
1959
                {
1960
                    #if CV_SIMD128
1961
                    if( useSIMD )
1962
                    {
1963
                        v_float32x4 v_scale = v_setall_f32((float)INTER_TAB_SIZE);
1964
                        v_int32x4 v_mask = v_setall_s32(INTER_TAB_SIZE - 1);
1965
                        v_int32x4 v_scale3 = v_setall_s32(INTER_TAB_SIZE);
1966
                        int span = v_float32x4::nlanes;
1967
                        for( ; x <= size.width - span * 2; x += span * 2 )
1968
                        {
1969
                            v_int32x4 v_ix0 = v_round(v_scale * (v_load(src1f + x)));
1970
                            v_int32x4 v_ix1 = v_round(v_scale * (v_load(src1f + x + span)));
1971
                            v_int32x4 v_iy0 = v_round(v_scale * (v_load(src2f + x)));
1972
                            v_int32x4 v_iy1 = v_round(v_scale * (v_load(src2f + x + span)));
1973

1974
                            v_int16x8 v_dst[2];
1975
                            v_dst[0] = v_pack(v_shr<INTER_BITS>(v_ix0), v_shr<INTER_BITS>(v_ix1));
1976
                            v_dst[1] = v_pack(v_shr<INTER_BITS>(v_iy0), v_shr<INTER_BITS>(v_iy1));
1977
                            v_store_interleave(dst1 + (x << 1), v_dst[0], v_dst[1]);
1978

1979
                            v_int32x4 v_dst0 = v_muladd(v_scale3, (v_iy0 & v_mask), (v_ix0 & v_mask));
1980
                            v_int32x4 v_dst1 = v_muladd(v_scale3, (v_iy1 & v_mask), (v_ix1 & v_mask));
1981
                            v_store(dst2 + x, v_pack_u(v_dst0, v_dst1));
1982
                        }
1983
                    }
1984
                    #endif
1985
                    for( ; x < size.width; x++ )
1986
                    {
1987
                        int ix = saturate_cast<int>(src1f[x]*INTER_TAB_SIZE);
1988
                        int iy = saturate_cast<int>(src2f[x]*INTER_TAB_SIZE);
1989
                        dst1[x*2] = saturate_cast<short>(ix >> INTER_BITS);
1990
                        dst1[x*2+1] = saturate_cast<short>(iy >> INTER_BITS);
1991
                        dst2[x] = (ushort)((iy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (ix & (INTER_TAB_SIZE-1)));
1992
                    }
1993
                }
1994
            }
1995
        }
1996
        else if( m1type == CV_32FC2 && dstm1type == CV_16SC2 )
1997
        {
1998
            if( nninterpolate )
1999
            {
2000
                #if CV_SIMD128
2001
                int span = v_float32x4::nlanes;
2002
                if( useSIMD )
2003
                    for( ; x <= (size.width << 1) - span * 2; x += span * 2 )
2004
                        v_store(dst1 + x, v_pack(v_round(v_load(src1f + x)),
2005
                                                 v_round(v_load(src1f + x + span))));
2006
                #endif
2007
                for( ; x < size.width; x++ )
2008
                {
2009
                    dst1[x*2] = saturate_cast<short>(src1f[x*2]);
2010
                    dst1[x*2+1] = saturate_cast<short>(src1f[x*2+1]);
2011
                }
2012
            }
2013
            else
2014
            {
2015
                #if CV_TRY_SSE4_1
2016
                if( useSSE4_1 )
2017
                    opt_SSE4_1::convertMaps_32f2c16s_SSE41(src1f, dst1, dst2, size.width);
2018
                else
2019
                #endif
2020
                {
2021
                    #if CV_SIMD128
2022
                    if( useSIMD )
2023
                    {
2024
                        v_float32x4 v_scale = v_setall_f32((float)INTER_TAB_SIZE);
2025
                        v_int32x4 v_mask = v_setall_s32(INTER_TAB_SIZE - 1);
2026
                        v_int32x4 v_scale3 = v_setall_s32(INTER_TAB_SIZE);
2027
                        int span = v_uint16x8::nlanes;
2028
                        for (; x <= size.width - span; x += span )
2029
                        {
2030
                            v_float32x4 v_src0[2], v_src1[2];
2031
                            v_load_deinterleave(src1f + (x << 1), v_src0[0], v_src0[1]);
2032
                            v_load_deinterleave(src1f + (x << 1) + span, v_src1[0], v_src1[1]);
2033
                            v_int32x4 v_ix0 = v_round(v_src0[0] * v_scale);
2034
                            v_int32x4 v_ix1 = v_round(v_src1[0] * v_scale);
2035
                            v_int32x4 v_iy0 = v_round(v_src0[1] * v_scale);
2036
                            v_int32x4 v_iy1 = v_round(v_src1[1] * v_scale);
2037

2038
                            v_int16x8 v_dst[2];
2039
                            v_dst[0] = v_pack(v_shr<INTER_BITS>(v_ix0), v_shr<INTER_BITS>(v_ix1));
2040
                            v_dst[1] = v_pack(v_shr<INTER_BITS>(v_iy0), v_shr<INTER_BITS>(v_iy1));
2041
                            v_store_interleave(dst1 + (x << 1), v_dst[0], v_dst[1]);
2042

2043
                            v_store(dst2 + x, v_pack_u(
2044
                                v_muladd(v_scale3, (v_iy0 & v_mask), (v_ix0 & v_mask)),
2045
                                v_muladd(v_scale3, (v_iy1 & v_mask), (v_ix1 & v_mask))));
2046
                        }
2047
                    }
2048
                    #endif
2049
                    for( ; x < size.width; x++ )
2050
                    {
2051
                        int ix = saturate_cast<int>(src1f[x*2]*INTER_TAB_SIZE);
2052
                        int iy = saturate_cast<int>(src1f[x*2+1]*INTER_TAB_SIZE);
2053
                        dst1[x*2] = saturate_cast<short>(ix >> INTER_BITS);
2054
                        dst1[x*2+1] = saturate_cast<short>(iy >> INTER_BITS);
2055
                        dst2[x] = (ushort)((iy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (ix & (INTER_TAB_SIZE-1)));
2056
                    }
2057
                }
2058
            }
2059
        }
2060
        else if( m1type == CV_16SC2 && dstm1type == CV_32FC1 )
2061
        {
2062
            #if CV_SIMD128
2063
            if( useSIMD )
2064
            {
2065
                v_uint16x8 v_mask2 =  v_setall_u16(INTER_TAB_SIZE2-1);
2066
                v_uint32x4 v_zero =   v_setzero_u32(), v_mask = v_setall_u32(INTER_TAB_SIZE-1);
2067
                v_float32x4 v_scale = v_setall_f32(scale);
2068
                int span = v_float32x4::nlanes;
2069
                for( ; x <= size.width - span * 2; x += span * 2 )
2070
                {
2071
                    v_uint32x4 v_fxy1, v_fxy2;
2072
                    if ( src2 )
2073
                    {
2074
                        v_uint16x8 v_src2 = v_load(src2 + x) & v_mask2;
2075
                        v_expand(v_src2, v_fxy1, v_fxy2);
2076
                    }
2077
                    else
2078
                        v_fxy1 = v_fxy2 = v_zero;
2079

2080
                    v_int16x8 v_src[2];
2081
                    v_int32x4 v_src0[2], v_src1[2];
2082
                    v_load_deinterleave(src1 + (x << 1), v_src[0], v_src[1]);
2083
                    v_expand(v_src[0], v_src0[0], v_src0[1]);
2084
                    v_expand(v_src[1], v_src1[0], v_src1[1]);
2085
                    #define CV_COMPUTE_MAP_X(X, FXY)  v_muladd(v_scale, v_cvt_f32(v_reinterpret_as_s32((FXY) & v_mask)),\
2086
                                                                        v_cvt_f32(v_reinterpret_as_s32(X)))
2087
                    #define CV_COMPUTE_MAP_Y(Y, FXY)  v_muladd(v_scale, v_cvt_f32(v_reinterpret_as_s32((FXY) >> INTER_BITS)),\
2088
                                                                        v_cvt_f32(v_reinterpret_as_s32(Y)))
2089
                    v_float32x4 v_dst1 = CV_COMPUTE_MAP_X(v_src0[0], v_fxy1);
2090
                    v_float32x4 v_dst2 = CV_COMPUTE_MAP_Y(v_src1[0], v_fxy1);
2091
                    v_store(dst1f + x, v_dst1);
2092
                    v_store(dst2f + x, v_dst2);
2093

2094
                    v_dst1 = CV_COMPUTE_MAP_X(v_src0[1], v_fxy2);
2095
                    v_dst2 = CV_COMPUTE_MAP_Y(v_src1[1], v_fxy2);
2096
                    v_store(dst1f + x + span, v_dst1);
2097
                    v_store(dst2f + x + span, v_dst2);
2098
                    #undef CV_COMPUTE_MAP_X
2099
                    #undef CV_COMPUTE_MAP_Y
2100
                }
2101
            }
2102
            #endif
2103
            for( ; x < size.width; x++ )
2104
            {
2105
                int fxy = src2 ? src2[x] & (INTER_TAB_SIZE2-1) : 0;
2106
                dst1f[x] = src1[x*2] + (fxy & (INTER_TAB_SIZE-1))*scale;
2107
                dst2f[x] = src1[x*2+1] + (fxy >> INTER_BITS)*scale;
2108
            }
2109
        }
2110
        else if( m1type == CV_16SC2 && dstm1type == CV_32FC2 )
2111
        {
2112
            #if CV_SIMD128
2113
            if( useSIMD )
2114
            {
2115
                v_int16x8 v_mask2 = v_setall_s16(INTER_TAB_SIZE2-1);
2116
                v_int32x4 v_zero = v_setzero_s32(), v_mask = v_setall_s32(INTER_TAB_SIZE-1);
2117
                v_float32x4 v_scale = v_setall_f32(scale);
2118
                int span = v_int16x8::nlanes;
2119
                for( ; x <= size.width - span; x += span )
2120
                {
2121
                    v_int32x4 v_fxy1, v_fxy2;
2122
                    if (src2)
2123
                    {
2124
                        v_int16x8 v_src2 = v_load((short *)src2 + x) & v_mask2;
2125
                        v_expand(v_src2, v_fxy1, v_fxy2);
2126
                    }
2127
                    else
2128
                        v_fxy1 = v_fxy2 = v_zero;
2129

2130
                    v_int16x8 v_src[2];
2131
                    v_int32x4 v_src0[2], v_src1[2];
2132
                    v_float32x4 v_dst[2];
2133
                    v_load_deinterleave(src1 + (x << 1), v_src[0], v_src[1]);
2134
                    v_expand(v_src[0], v_src0[0], v_src0[1]);
2135
                    v_expand(v_src[1], v_src1[0], v_src1[1]);
2136

2137
                    #define CV_COMPUTE_MAP_X(X, FXY) v_muladd(v_scale, v_cvt_f32((FXY) & v_mask), v_cvt_f32(X))
2138
                    #define CV_COMPUTE_MAP_Y(Y, FXY) v_muladd(v_scale, v_cvt_f32((FXY) >> INTER_BITS), v_cvt_f32(Y))
2139
                    v_dst[0] = CV_COMPUTE_MAP_X(v_src0[0], v_fxy1);
2140
                    v_dst[1] = CV_COMPUTE_MAP_Y(v_src1[0], v_fxy1);
2141
                    v_store_interleave(dst1f + (x << 1), v_dst[0], v_dst[1]);
2142

2143
                    v_dst[0] = CV_COMPUTE_MAP_X(v_src0[1], v_fxy2);
2144
                    v_dst[1] = CV_COMPUTE_MAP_Y(v_src1[1], v_fxy2);
2145
                    v_store_interleave(dst1f + (x << 1) + span, v_dst[0], v_dst[1]);
2146
                    #undef CV_COMPUTE_MAP_X
2147
                    #undef CV_COMPUTE_MAP_Y
2148
                }
2149
            }
2150
            #endif
2151
            for( ; x < size.width; x++ )
2152
            {
2153
                int fxy = src2 ? src2[x] & (INTER_TAB_SIZE2-1): 0;
2154
                dst1f[x*2] = src1[x*2] + (fxy & (INTER_TAB_SIZE-1))*scale;
2155
                dst1f[x*2+1] = src1[x*2+1] + (fxy >> INTER_BITS)*scale;
2156
            }
2157
        }
2158
        else
2159
            CV_Error( CV_StsNotImplemented, "Unsupported combination of input/output matrices" );
2160
    }
2161
}
2162

2163

2164
namespace cv
2165
{
2166

2167
class WarpAffineInvoker :
2168
    public ParallelLoopBody
2169
{
2170
public:
2171
    WarpAffineInvoker(const Mat &_src, Mat &_dst, int _interpolation, int _borderType,
2172
                      const Scalar &_borderValue, int *_adelta, int *_bdelta, const double *_M) :
2173
        ParallelLoopBody(), src(_src), dst(_dst), interpolation(_interpolation),
2174
        borderType(_borderType), borderValue(_borderValue), adelta(_adelta), bdelta(_bdelta),
2175
        M(_M)
2176
    {
2177
    }
2178

2179
    virtual void operator() (const Range& range) const CV_OVERRIDE
2180
    {
2181
        const int BLOCK_SZ = 64;
2182
        short XY[BLOCK_SZ*BLOCK_SZ*2], A[BLOCK_SZ*BLOCK_SZ];
2183
        const int AB_BITS = MAX(10, (int)INTER_BITS);
2184
        const int AB_SCALE = 1 << AB_BITS;
2185
        int round_delta = interpolation == INTER_NEAREST ? AB_SCALE/2 : AB_SCALE/INTER_TAB_SIZE/2, x, y, x1, y1;
2186
    #if CV_TRY_AVX2
2187
        bool useAVX2 = CV_CPU_HAS_SUPPORT_AVX2;
2188
    #endif
2189
    #if CV_SIMD128
2190
        bool useSIMD = hasSIMD128();
2191
    #endif
2192
    #if CV_TRY_SSE4_1
2193
        bool useSSE4_1 = CV_CPU_HAS_SUPPORT_SSE4_1;
2194
    #endif
2195

2196
        int bh0 = std::min(BLOCK_SZ/2, dst.rows);
2197
        int bw0 = std::min(BLOCK_SZ*BLOCK_SZ/bh0, dst.cols);
2198
        bh0 = std::min(BLOCK_SZ*BLOCK_SZ/bw0, dst.rows);
2199

2200
        for( y = range.start; y < range.end; y += bh0 )
2201
        {
2202
            for( x = 0; x < dst.cols; x += bw0 )
2203
            {
2204
                int bw = std::min( bw0, dst.cols - x);
2205
                int bh = std::min( bh0, range.end - y);
2206

2207
                Mat _XY(bh, bw, CV_16SC2, XY), matA;
2208
                Mat dpart(dst, Rect(x, y, bw, bh));
2209

2210
                for( y1 = 0; y1 < bh; y1++ )
2211
                {
2212
                    short* xy = XY + y1*bw*2;
2213
                    int X0 = saturate_cast<int>((M[1]*(y + y1) + M[2])*AB_SCALE) + round_delta;
2214
                    int Y0 = saturate_cast<int>((M[4]*(y + y1) + M[5])*AB_SCALE) + round_delta;
2215

2216
                    if( interpolation == INTER_NEAREST )
2217
                    {
2218
                        x1 = 0;
2219
                        #if CV_TRY_SSE4_1
2220
                        if( useSSE4_1 )
2221
                            opt_SSE4_1::WarpAffineInvoker_Blockline_SSE41(adelta + x, bdelta + x, xy, X0, Y0, bw);
2222
                        else
2223
                        #endif
2224
                        {
2225
                            #if CV_SIMD128
2226
                            if( useSIMD )
2227
                            {
2228
                                v_int32x4 v_X0 = v_setall_s32(X0), v_Y0 = v_setall_s32(Y0);
2229
                                int span = v_uint16x8::nlanes;
2230
                                for( ; x1 <= bw - span; x1 += span )
2231
                                {
2232
                                    v_int16x8 v_dst[2];
2233
                                    #define CV_CONVERT_MAP(ptr,offset,shift) v_pack(v_shr<AB_BITS>(shift+v_load(ptr + offset)),\
2234
                                                                                    v_shr<AB_BITS>(shift+v_load(ptr + offset + 4)))
2235
                                    v_dst[0] = CV_CONVERT_MAP(adelta, x+x1, v_X0);
2236
                                    v_dst[1] = CV_CONVERT_MAP(bdelta, x+x1, v_Y0);
2237
                                    #undef CV_CONVERT_MAP
2238
                                    v_store_interleave(xy + (x1 << 1), v_dst[0], v_dst[1]);
2239
                                }
2240
                            }
2241
                            #endif
2242
                            for( ; x1 < bw; x1++ )
2243
                            {
2244
                                int X = (X0 + adelta[x+x1]) >> AB_BITS;
2245
                                int Y = (Y0 + bdelta[x+x1]) >> AB_BITS;
2246
                                xy[x1*2] = saturate_cast<short>(X);
2247
                                xy[x1*2+1] = saturate_cast<short>(Y);
2248
                            }
2249
                        }
2250
                    }
2251
                    else
2252
                    {
2253
                        short* alpha = A + y1*bw;
2254
                        x1 = 0;
2255
                        #if CV_TRY_AVX2
2256
                        if ( useAVX2 )
2257
                            x1 = opt_AVX2::warpAffineBlockline(adelta + x, bdelta + x, xy, alpha, X0, Y0, bw);
2258
                        #endif
2259
                        #if CV_SIMD128
2260
                        if( useSIMD )
2261
                        {
2262
                            v_int32x4 v__X0 = v_setall_s32(X0), v__Y0 = v_setall_s32(Y0);
2263
                            v_int32x4 v_mask = v_setall_s32(INTER_TAB_SIZE - 1);
2264
                            int span = v_float32x4::nlanes;
2265
                            for( ; x1 <= bw - span * 2; x1 += span * 2 )
2266
                            {
2267
                                v_int32x4 v_X0 = v_shr<AB_BITS - INTER_BITS>(v__X0 + v_load(adelta + x + x1));
2268
                                v_int32x4 v_Y0 = v_shr<AB_BITS - INTER_BITS>(v__Y0 + v_load(bdelta + x + x1));
2269
                                v_int32x4 v_X1 = v_shr<AB_BITS - INTER_BITS>(v__X0 + v_load(adelta + x + x1 + span));
2270
                                v_int32x4 v_Y1 = v_shr<AB_BITS - INTER_BITS>(v__Y0 + v_load(bdelta + x + x1 + span));
2271

2272
                                v_int16x8 v_xy[2];
2273
                                v_xy[0] = v_pack(v_shr<INTER_BITS>(v_X0), v_shr<INTER_BITS>(v_X1));
2274
                                v_xy[1] = v_pack(v_shr<INTER_BITS>(v_Y0), v_shr<INTER_BITS>(v_Y1));
2275
                                v_store_interleave(xy + (x1 << 1), v_xy[0], v_xy[1]);
2276

2277
                                v_int32x4 v_alpha0 = v_shl<INTER_BITS>(v_Y0 & v_mask) | (v_X0 & v_mask);
2278
                                v_int32x4 v_alpha1 = v_shl<INTER_BITS>(v_Y1 & v_mask) | (v_X1 & v_mask);
2279
                                v_store(alpha + x1, v_pack(v_alpha0, v_alpha1));
2280
                            }
2281
                        }
2282
                        #endif
2283
                        for( ; x1 < bw; x1++ )
2284
                        {
2285
                            int X = (X0 + adelta[x+x1]) >> (AB_BITS - INTER_BITS);
2286
                            int Y = (Y0 + bdelta[x+x1]) >> (AB_BITS - INTER_BITS);
2287
                            xy[x1*2] = saturate_cast<short>(X >> INTER_BITS);
2288
                            xy[x1*2+1] = saturate_cast<short>(Y >> INTER_BITS);
2289
                            alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE +
2290
                                    (X & (INTER_TAB_SIZE-1)));
2291
                        }
2292
                    }
2293
                }
2294

2295
                if( interpolation == INTER_NEAREST )
2296
                    remap( src, dpart, _XY, Mat(), interpolation, borderType, borderValue );
2297
                else
2298
                {
2299
                    Mat _matA(bh, bw, CV_16U, A);
2300
                    remap( src, dpart, _XY, _matA, interpolation, borderType, borderValue );
2301
                }
2302
            }
2303
        }
2304
    }
2305

2306
private:
2307
    Mat src;
2308
    Mat dst;
2309
    int interpolation, borderType;
2310
    Scalar borderValue;
2311
    int *adelta, *bdelta;
2312
    const double *M;
2313
};
2314

2315

2316
#if defined (HAVE_IPP) && IPP_VERSION_X100 >= 810 && !IPP_DISABLE_WARPAFFINE
2317
typedef IppStatus (CV_STDCALL* ippiWarpAffineBackFunc)(const void*, IppiSize, int, IppiRect, void *, int, IppiRect, double [2][3], int);
2318

2319
class IPPWarpAffineInvoker :
2320
    public ParallelLoopBody
2321
{
2322
public:
2323
    IPPWarpAffineInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[2][3], int &_interpolation, int _borderType,
2324
                         const Scalar &_borderValue, ippiWarpAffineBackFunc _func, bool *_ok) :
2325
        ParallelLoopBody(), src(_src), dst(_dst), mode(_interpolation), coeffs(_coeffs),
2326
        borderType(_borderType), borderValue(_borderValue), func(_func), ok(_ok)
2327
    {
2328
        *ok = true;
2329
    }
2330

2331
    virtual void operator() (const Range& range) const CV_OVERRIDE
2332
    {
2333
        IppiSize srcsize = { src.cols, src.rows };
2334
        IppiRect srcroi = { 0, 0, src.cols, src.rows };
2335
        IppiRect dstroi = { 0, range.start, dst.cols, range.end - range.start };
2336
        int cnn = src.channels();
2337
        if( borderType == BORDER_CONSTANT )
2338
        {
2339
            IppiSize setSize = { dst.cols, range.end - range.start };
2340
            void *dataPointer = dst.ptr(range.start);
2341
            if( !IPPSet( borderValue, dataPointer, (int)dst.step[0], setSize, cnn, src.depth() ) )
2342
            {
2343
                *ok = false;
2344
                return;
2345
            }
2346
        }
2347

2348
        // Aug 2013: problem in IPP 7.1, 8.0 : sometimes function return ippStsCoeffErr
2349
        IppStatus status = CV_INSTRUMENT_FUN_IPP(func,( src.ptr(), srcsize, (int)src.step[0], srcroi, dst.ptr(),
2350
                                (int)dst.step[0], dstroi, coeffs, mode ));
2351
        if( status < 0)
2352
            *ok = false;
2353
        else
2354
        {
2355
            CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
2356
        }
2357
    }
2358
private:
2359
    Mat &src;
2360
    Mat &dst;
2361
    int mode;
2362
    double (&coeffs)[2][3];
2363
    int borderType;
2364
    Scalar borderValue;
2365
    ippiWarpAffineBackFunc func;
2366
    bool *ok;
2367
    const IPPWarpAffineInvoker& operator= (const IPPWarpAffineInvoker&);
2368
};
2369
#endif
2370

2371
#ifdef HAVE_OPENCL
2372

2373
enum { OCL_OP_PERSPECTIVE = 1, OCL_OP_AFFINE = 0 };
2374

2375
static bool ocl_warpTransform_cols4(InputArray _src, OutputArray _dst, InputArray _M0,
2376
                                    Size dsize, int flags, int borderType, const Scalar& borderValue,
2377
                                    int op_type)
2378
{
2379
    CV_Assert(op_type == OCL_OP_AFFINE || op_type == OCL_OP_PERSPECTIVE);
2380
    const ocl::Device & dev = ocl::Device::getDefault();
2381
    int type = _src.type(), dtype = _dst.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
2382

2383
    int interpolation = flags & INTER_MAX;
2384
    if( interpolation == INTER_AREA )
2385
        interpolation = INTER_LINEAR;
2386

2387
    if ( !dev.isIntel() || !(type == CV_8UC1) ||
2388
         !(dtype == CV_8UC1) || !(_dst.cols() % 4 == 0) ||
2389
         !(borderType == cv::BORDER_CONSTANT &&
2390
          (interpolation == cv::INTER_NEAREST || interpolation == cv::INTER_LINEAR || interpolation == cv::INTER_CUBIC)))
2391
        return false;
2392

2393
    const char * const warp_op[2] = { "Affine", "Perspective" };
2394
    const char * const interpolationMap[3] = { "nearest", "linear", "cubic" };
2395
    ocl::ProgramSource program = ocl::imgproc::warp_transform_oclsrc;
2396
    String kernelName = format("warp%s_%s_8u", warp_op[op_type], interpolationMap[interpolation]);
2397

2398
    bool is32f = (interpolation == INTER_CUBIC || interpolation == INTER_LINEAR) && op_type == OCL_OP_AFFINE;
2399
    int wdepth = interpolation == INTER_NEAREST ? depth : std::max(is32f ? CV_32F : CV_32S, depth);
2400
    int sctype = CV_MAKETYPE(wdepth, cn);
2401

2402
    ocl::Kernel k;
2403
    String opts = format("-D ST=%s", ocl::typeToStr(sctype));
2404

2405
    k.create(kernelName.c_str(), program, opts);
2406
    if (k.empty())
2407
        return false;
2408

2409
    float borderBuf[] = { 0, 0, 0, 0 };
2410
    scalarToRawData(borderValue, borderBuf, sctype);
2411

2412
    UMat src = _src.getUMat(), M0;
2413
    _dst.create( dsize.empty() ? src.size() : dsize, src.type() );
2414
    UMat dst = _dst.getUMat();
2415

2416
    float M[9] = {0};
2417
    int matRows = (op_type == OCL_OP_AFFINE ? 2 : 3);
2418
    Mat matM(matRows, 3, CV_32F, M), M1 = _M0.getMat();
2419
    CV_Assert( (M1.type() == CV_32F || M1.type() == CV_64F) && M1.rows == matRows && M1.cols == 3 );
2420
    M1.convertTo(matM, matM.type());
2421

2422
    if( !(flags & WARP_INVERSE_MAP) )
2423
    {
2424
        if (op_type == OCL_OP_PERSPECTIVE)
2425
            invert(matM, matM);
2426
        else
2427
        {
2428
            float D = M[0]*M[4] - M[1]*M[3];
2429
            D = D != 0 ? 1.f/D : 0;
2430
            float A11 = M[4]*D, A22=M[0]*D;
2431
            M[0] = A11; M[1] *= -D;
2432
            M[3] *= -D; M[4] = A22;
2433
            float b1 = -M[0]*M[2] - M[1]*M[5];
2434
            float b2 = -M[3]*M[2] - M[4]*M[5];
2435
            M[2] = b1; M[5] = b2;
2436
        }
2437
    }
2438
    matM.convertTo(M0, CV_32F);
2439

2440
    k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst), ocl::KernelArg::PtrReadOnly(M0),
2441
           ocl::KernelArg(ocl::KernelArg::CONSTANT, 0, 0, 0, borderBuf, CV_ELEM_SIZE(sctype)));
2442

2443
    size_t globalThreads[2];
2444
    globalThreads[0] = (size_t)(dst.cols / 4);
2445
    globalThreads[1] = (size_t)dst.rows;
2446

2447
    return k.run(2, globalThreads, NULL, false);
2448
}
2449

2450
static bool ocl_warpTransform(InputArray _src, OutputArray _dst, InputArray _M0,
2451
                              Size dsize, int flags, int borderType, const Scalar& borderValue,
2452
                              int op_type)
2453
{
2454
    CV_Assert(op_type == OCL_OP_AFFINE || op_type == OCL_OP_PERSPECTIVE);
2455
    const ocl::Device & dev = ocl::Device::getDefault();
2456

2457
    int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
2458
    const bool doubleSupport = dev.doubleFPConfig() > 0;
2459

2460
    int interpolation = flags & INTER_MAX;
2461
    if( interpolation == INTER_AREA )
2462
        interpolation = INTER_LINEAR;
2463
    int rowsPerWI = dev.isIntel() && op_type == OCL_OP_AFFINE && interpolation <= INTER_LINEAR ? 4 : 1;
2464

2465
    if ( !(borderType == cv::BORDER_CONSTANT &&
2466
           (interpolation == cv::INTER_NEAREST || interpolation == cv::INTER_LINEAR || interpolation == cv::INTER_CUBIC)) ||
2467
         (!doubleSupport && depth == CV_64F) || cn > 4)
2468
        return false;
2469

2470
    bool useDouble = depth == CV_64F;
2471

2472
    const char * const interpolationMap[3] = { "NEAREST", "LINEAR", "CUBIC" };
2473
    ocl::ProgramSource program = op_type == OCL_OP_AFFINE ?
2474
                ocl::imgproc::warp_affine_oclsrc : ocl::imgproc::warp_perspective_oclsrc;
2475
    const char * const kernelName = op_type == OCL_OP_AFFINE ? "warpAffine" : "warpPerspective";
2476

2477
    int scalarcn = cn == 3 ? 4 : cn;
2478
    bool is32f = !dev.isAMD() && (interpolation == INTER_CUBIC || interpolation == INTER_LINEAR) && op_type == OCL_OP_AFFINE;
2479
    int wdepth = interpolation == INTER_NEAREST ? depth : std::max(is32f ? CV_32F : CV_32S, depth);
2480
    int sctype = CV_MAKETYPE(wdepth, scalarcn);
2481

2482
    ocl::Kernel k;
2483
    String opts;
2484
    if (interpolation == INTER_NEAREST)
2485
    {
2486
        opts = format("-D INTER_NEAREST -D T=%s%s -D CT=%s -D T1=%s -D ST=%s -D cn=%d -D rowsPerWI=%d",
2487
                      ocl::typeToStr(type),
2488
                      doubleSupport ? " -D DOUBLE_SUPPORT" : "",
2489
                      useDouble ? "double" : "float",
2490
                      ocl::typeToStr(CV_MAT_DEPTH(type)),
2491
                      ocl::typeToStr(sctype), cn, rowsPerWI);
2492
    }
2493
    else
2494
    {
2495
        char cvt[2][50];
2496
        opts = format("-D INTER_%s -D T=%s -D T1=%s -D ST=%s -D WT=%s -D depth=%d"
2497
                      " -D convertToWT=%s -D convertToT=%s%s -D CT=%s -D cn=%d -D rowsPerWI=%d",
2498
                      interpolationMap[interpolation], ocl::typeToStr(type),
2499
                      ocl::typeToStr(CV_MAT_DEPTH(type)),
2500
                      ocl::typeToStr(sctype),
2501
                      ocl::typeToStr(CV_MAKE_TYPE(wdepth, cn)), depth,
2502
                      ocl::convertTypeStr(depth, wdepth, cn, cvt[0]),
2503
                      ocl::convertTypeStr(wdepth, depth, cn, cvt[1]),
2504
                      doubleSupport ? " -D DOUBLE_SUPPORT" : "",
2505
                      useDouble ? "double" : "float",
2506
                      cn, rowsPerWI);
2507
    }
2508

2509
    k.create(kernelName, program, opts);
2510
    if (k.empty())
2511
        return false;
2512

2513
    double borderBuf[] = { 0, 0, 0, 0 };
2514
    scalarToRawData(borderValue, borderBuf, sctype);
2515

2516
    UMat src = _src.getUMat(), M0;
2517
    _dst.create( dsize.empty() ? src.size() : dsize, src.type() );
2518
    UMat dst = _dst.getUMat();
2519

2520
    double M[9] = {0};
2521
    int matRows = (op_type == OCL_OP_AFFINE ? 2 : 3);
2522
    Mat matM(matRows, 3, CV_64F, M), M1 = _M0.getMat();
2523
    CV_Assert( (M1.type() == CV_32F || M1.type() == CV_64F) &&
2524
               M1.rows == matRows && M1.cols == 3 );
2525
    M1.convertTo(matM, matM.type());
2526

2527
    if( !(flags & WARP_INVERSE_MAP) )
2528
    {
2529
        if (op_type == OCL_OP_PERSPECTIVE)
2530
            invert(matM, matM);
2531
        else
2532
        {
2533
            double D = M[0]*M[4] - M[1]*M[3];
2534
            D = D != 0 ? 1./D : 0;
2535
            double A11 = M[4]*D, A22=M[0]*D;
2536
            M[0] = A11; M[1] *= -D;
2537
            M[3] *= -D; M[4] = A22;
2538
            double b1 = -M[0]*M[2] - M[1]*M[5];
2539
            double b2 = -M[3]*M[2] - M[4]*M[5];
2540
            M[2] = b1; M[5] = b2;
2541
        }
2542
    }
2543
    matM.convertTo(M0, useDouble ? CV_64F : CV_32F);
2544

2545
    k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst), ocl::KernelArg::PtrReadOnly(M0),
2546
           ocl::KernelArg(ocl::KernelArg::CONSTANT, 0, 0, 0, borderBuf, CV_ELEM_SIZE(sctype)));
2547

2548
    size_t globalThreads[2] = { (size_t)dst.cols, ((size_t)dst.rows + rowsPerWI - 1) / rowsPerWI };
2549
    return k.run(2, globalThreads, NULL, false);
2550
}
2551

2552
#endif
2553

2554
namespace hal {
2555

2556
void warpAffine(int src_type,
2557
                const uchar * src_data, size_t src_step, int src_width, int src_height,
2558
                uchar * dst_data, size_t dst_step, int dst_width, int dst_height,
2559
                const double M[6], int interpolation, int borderType, const double borderValue[4])
2560
{
2561
    CALL_HAL(warpAffine, cv_hal_warpAffine, src_type, src_data, src_step, src_width, src_height, dst_data, dst_step, dst_width, dst_height, M, interpolation, borderType, borderValue);
2562

2563
    Mat src(Size(src_width, src_height), src_type, const_cast<uchar*>(src_data), src_step);
2564
    Mat dst(Size(dst_width, dst_height), src_type, dst_data, dst_step);
2565

2566
    int x;
2567
    AutoBuffer<int> _abdelta(dst.cols*2);
2568
    int* adelta = &_abdelta[0], *bdelta = adelta + dst.cols;
2569
    const int AB_BITS = MAX(10, (int)INTER_BITS);
2570
    const int AB_SCALE = 1 << AB_BITS;
2571

2572
    for( x = 0; x < dst.cols; x++ )
2573
    {
2574
        adelta[x] = saturate_cast<int>(M[0]*x*AB_SCALE);
2575
        bdelta[x] = saturate_cast<int>(M[3]*x*AB_SCALE);
2576
    }
2577

2578
    Range range(0, dst.rows);
2579
    WarpAffineInvoker invoker(src, dst, interpolation, borderType,
2580
                              Scalar(borderValue[0], borderValue[1], borderValue[2], borderValue[3]),
2581
                              adelta, bdelta, M);
2582
    parallel_for_(range, invoker, dst.total()/(double)(1<<16));
2583
}
2584

2585
} // hal::
2586
} // cv::
2587

2588

2589
void cv::warpAffine( InputArray _src, OutputArray _dst,
2590
                     InputArray _M0, Size dsize,
2591
                     int flags, int borderType, const Scalar& borderValue )
2592
{
2593
    CV_INSTRUMENT_REGION();
2594

2595
    int interpolation = flags & INTER_MAX;
2596
    CV_Assert( _src.channels() <= 4 || (interpolation != INTER_LANCZOS4 &&
2597
                                        interpolation != INTER_CUBIC) );
2598

2599
    CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat() &&
2600
               _src.cols() <= SHRT_MAX && _src.rows() <= SHRT_MAX,
2601
               ocl_warpTransform_cols4(_src, _dst, _M0, dsize, flags, borderType,
2602
                                       borderValue, OCL_OP_AFFINE))
2603

2604
    CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
2605
               ocl_warpTransform(_src, _dst, _M0, dsize, flags, borderType,
2606
                                 borderValue, OCL_OP_AFFINE))
2607

2608
    Mat src = _src.getMat(), M0 = _M0.getMat();
2609
    _dst.create( dsize.empty() ? src.size() : dsize, src.type() );
2610
    Mat dst = _dst.getMat();
2611
    CV_Assert( src.cols > 0 && src.rows > 0 );
2612
    if( dst.data == src.data )
2613
        src = src.clone();
2614

2615
    double M[6] = {0};
2616
    Mat matM(2, 3, CV_64F, M);
2617
    if( interpolation == INTER_AREA )
2618
        interpolation = INTER_LINEAR;
2619

2620
    CV_Assert( (M0.type() == CV_32F || M0.type() == CV_64F) && M0.rows == 2 && M0.cols == 3 );
2621
    M0.convertTo(matM, matM.type());
2622

2623
    if( !(flags & WARP_INVERSE_MAP) )
2624
    {
2625
        double D = M[0]*M[4] - M[1]*M[3];
2626
        D = D != 0 ? 1./D : 0;
2627
        double A11 = M[4]*D, A22=M[0]*D;
2628
        M[0] = A11; M[1] *= -D;
2629
        M[3] *= -D; M[4] = A22;
2630
        double b1 = -M[0]*M[2] - M[1]*M[5];
2631
        double b2 = -M[3]*M[2] - M[4]*M[5];
2632
        M[2] = b1; M[5] = b2;
2633
    }
2634

2635
#if defined (HAVE_IPP) && IPP_VERSION_X100 >= 810 && !IPP_DISABLE_WARPAFFINE
2636
    CV_IPP_CHECK()
2637
    {
2638
        int type = src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
2639
        if( ( depth == CV_8U || depth == CV_16U || depth == CV_32F ) &&
2640
           ( cn == 1 || cn == 3 || cn == 4 ) &&
2641
           ( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC) &&
2642
           ( borderType == cv::BORDER_TRANSPARENT || borderType == cv::BORDER_CONSTANT) )
2643
        {
2644
            ippiWarpAffineBackFunc ippFunc = 0;
2645
            if ((flags & WARP_INVERSE_MAP) != 0)
2646
            {
2647
                ippFunc =
2648
                type == CV_8UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C1R :
2649
                type == CV_8UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C3R :
2650
                type == CV_8UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C4R :
2651
                type == CV_16UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_16u_C1R :
2652
                type == CV_16UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_16u_C3R :
2653
                type == CV_16UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_16u_C4R :
2654
                type == CV_32FC1 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C1R :
2655
                type == CV_32FC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C3R :
2656
                type == CV_32FC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C4R :
2657
                0;
2658
            }
2659
            else
2660
            {
2661
                ippFunc =
2662
                type == CV_8UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffine_8u_C1R :
2663
                type == CV_8UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffine_8u_C3R :
2664
                type == CV_8UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffine_8u_C4R :
2665
                type == CV_16UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffine_16u_C1R :
2666
                type == CV_16UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffine_16u_C3R :
2667
                type == CV_16UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffine_16u_C4R :
2668
                type == CV_32FC1 ? (ippiWarpAffineBackFunc)ippiWarpAffine_32f_C1R :
2669
                type == CV_32FC3 ? (ippiWarpAffineBackFunc)ippiWarpAffine_32f_C3R :
2670
                type == CV_32FC4 ? (ippiWarpAffineBackFunc)ippiWarpAffine_32f_C4R :
2671
                0;
2672
            }
2673
            int mode =
2674
            interpolation == INTER_LINEAR ? IPPI_INTER_LINEAR :
2675
            interpolation == INTER_NEAREST ? IPPI_INTER_NN :
2676
            interpolation == INTER_CUBIC ? IPPI_INTER_CUBIC :
2677
            0;
2678
            CV_Assert(mode && ippFunc);
2679

2680
            double coeffs[2][3];
2681
            for( int i = 0; i < 2; i++ )
2682
                for( int j = 0; j < 3; j++ )
2683
                    coeffs[i][j] = matM.at<double>(i, j);
2684

2685
            bool ok;
2686
            Range range(0, dst.rows);
2687
            IPPWarpAffineInvoker invoker(src, dst, coeffs, mode, borderType, borderValue, ippFunc, &ok);
2688
            parallel_for_(range, invoker, dst.total()/(double)(1<<16));
2689
            if( ok )
2690
            {
2691
                CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
2692
                return;
2693
            }
2694
            setIppErrorStatus();
2695
        }
2696
    }
2697
#endif
2698

2699
    hal::warpAffine(src.type(), src.data, src.step, src.cols, src.rows, dst.data, dst.step, dst.cols, dst.rows,
2700
                    M, interpolation, borderType, borderValue.val);
2701
}
2702

2703

2704
namespace cv
2705
{
2706

2707
class WarpPerspectiveInvoker :
2708
    public ParallelLoopBody
2709
{
2710
public:
2711
    WarpPerspectiveInvoker(const Mat &_src, Mat &_dst, const double *_M, int _interpolation,
2712
                           int _borderType, const Scalar &_borderValue) :
2713
        ParallelLoopBody(), src(_src), dst(_dst), M(_M), interpolation(_interpolation),
2714
        borderType(_borderType), borderValue(_borderValue)
2715
    {
2716
#if defined(_MSC_VER) && _MSC_VER == 1800 /* MSVS 2013 */ && CV_AVX
2717
        // details: https://github.com/opencv/opencv/issues/11026
2718
        borderValue.val[2] = _borderValue.val[2];
2719
        borderValue.val[3] = _borderValue.val[3];
2720
#endif
2721
    }
2722

2723
    virtual void operator() (const Range& range) const CV_OVERRIDE
2724
    {
2725
        const int BLOCK_SZ = 32;
2726
        short XY[BLOCK_SZ*BLOCK_SZ*2], A[BLOCK_SZ*BLOCK_SZ];
2727
        int x, y, x1, y1, width = dst.cols, height = dst.rows;
2728

2729
        int bh0 = std::min(BLOCK_SZ/2, height);
2730
        int bw0 = std::min(BLOCK_SZ*BLOCK_SZ/bh0, width);
2731
        bh0 = std::min(BLOCK_SZ*BLOCK_SZ/bw0, height);
2732

2733
        #if CV_TRY_SSE4_1
2734
        Ptr<opt_SSE4_1::WarpPerspectiveLine_SSE4> pwarp_impl_sse4;
2735
        if(CV_CPU_HAS_SUPPORT_SSE4_1)
2736
            pwarp_impl_sse4 = opt_SSE4_1::WarpPerspectiveLine_SSE4::getImpl(M);
2737
        #endif
2738

2739
        for( y = range.start; y < range.end; y += bh0 )
2740
        {
2741
            for( x = 0; x < width; x += bw0 )
2742
            {
2743
                int bw = std::min( bw0, width - x);
2744
                int bh = std::min( bh0, range.end - y); // height
2745

2746
                Mat _XY(bh, bw, CV_16SC2, XY), matA;
2747
                Mat dpart(dst, Rect(x, y, bw, bh));
2748

2749
                for( y1 = 0; y1 < bh; y1++ )
2750
                {
2751
                    short* xy = XY + y1*bw*2;
2752
                    double X0 = M[0]*x + M[1]*(y + y1) + M[2];
2753
                    double Y0 = M[3]*x + M[4]*(y + y1) + M[5];
2754
                    double W0 = M[6]*x + M[7]*(y + y1) + M[8];
2755

2756
                    if( interpolation == INTER_NEAREST )
2757
                    {
2758
                        x1 = 0;
2759

2760
                        #if CV_TRY_SSE4_1
2761
                        if (pwarp_impl_sse4)
2762
                            pwarp_impl_sse4->processNN(M, xy, X0, Y0, W0, bw);
2763
                        else
2764
                        #endif
2765
                        for( ; x1 < bw; x1++ )
2766
                        {
2767
                            double W = W0 + M[6]*x1;
2768
                            W = W ? 1./W : 0;
2769
                            double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0]*x1)*W));
2770
                            double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3]*x1)*W));
2771
                            int X = saturate_cast<int>(fX);
2772
                            int Y = saturate_cast<int>(fY);
2773

2774
                            xy[x1*2] = saturate_cast<short>(X);
2775
                            xy[x1*2+1] = saturate_cast<short>(Y);
2776
                        }
2777
                    }
2778
                    else
2779
                    {
2780
                        short* alpha = A + y1*bw;
2781
                        x1 = 0;
2782

2783
                        #if CV_TRY_SSE4_1
2784
                        if (pwarp_impl_sse4)
2785
                            pwarp_impl_sse4->process(M, xy, alpha, X0, Y0, W0, bw);
2786
                        else
2787
                        #endif
2788
                        for( ; x1 < bw; x1++ )
2789
                        {
2790
                            double W = W0 + M[6]*x1;
2791
                            W = W ? INTER_TAB_SIZE/W : 0;
2792
                            double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0]*x1)*W));
2793
                            double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3]*x1)*W));
2794
                            int X = saturate_cast<int>(fX);
2795
                            int Y = saturate_cast<int>(fY);
2796

2797
                            xy[x1*2] = saturate_cast<short>(X >> INTER_BITS);
2798
                            xy[x1*2+1] = saturate_cast<short>(Y >> INTER_BITS);
2799
                            alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE +
2800
                                                (X & (INTER_TAB_SIZE-1)));
2801
                        }
2802
                    }
2803
                }
2804

2805
                if( interpolation == INTER_NEAREST )
2806
                    remap( src, dpart, _XY, Mat(), interpolation, borderType, borderValue );
2807
                else
2808
                {
2809
                    Mat _matA(bh, bw, CV_16U, A);
2810
                    remap( src, dpart, _XY, _matA, interpolation, borderType, borderValue );
2811
                }
2812
            }
2813
        }
2814
    }
2815

2816
private:
2817
    Mat src;
2818
    Mat dst;
2819
    const double* M;
2820
    int interpolation, borderType;
2821
    Scalar borderValue;
2822
};
2823

2824
#if defined (HAVE_IPP) && IPP_VERSION_X100 >= 810 && !IPP_DISABLE_WARPPERSPECTIVE
2825
typedef IppStatus (CV_STDCALL* ippiWarpPerspectiveFunc)(const void*, IppiSize, int, IppiRect, void *, int, IppiRect, double [3][3], int);
2826

2827
class IPPWarpPerspectiveInvoker :
2828
    public ParallelLoopBody
2829
{
2830
public:
2831
    IPPWarpPerspectiveInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[3][3], int &_interpolation,
2832
                              int &_borderType, const Scalar &_borderValue, ippiWarpPerspectiveFunc _func, bool *_ok) :
2833
        ParallelLoopBody(), src(_src), dst(_dst), mode(_interpolation), coeffs(_coeffs),
2834
        borderType(_borderType), borderValue(_borderValue), func(_func), ok(_ok)
2835
    {
2836
        *ok = true;
2837
    }
2838

2839
    virtual void operator() (const Range& range) const CV_OVERRIDE
2840
    {
2841
        IppiSize srcsize = {src.cols, src.rows};
2842
        IppiRect srcroi = {0, 0, src.cols, src.rows};
2843
        IppiRect dstroi = {0, range.start, dst.cols, range.end - range.start};
2844
        int cnn = src.channels();
2845

2846
        if( borderType == BORDER_CONSTANT )
2847
        {
2848
            IppiSize setSize = {dst.cols, range.end - range.start};
2849
            void *dataPointer = dst.ptr(range.start);
2850
            if( !IPPSet( borderValue, dataPointer, (int)dst.step[0], setSize, cnn, src.depth() ) )
2851
            {
2852
                *ok = false;
2853
                return;
2854
            }
2855
        }
2856

2857
        IppStatus status = CV_INSTRUMENT_FUN_IPP(func,(src.ptr();, srcsize, (int)src.step[0], srcroi, dst.ptr(), (int)dst.step[0], dstroi, coeffs, mode));
2858
        if (status != ippStsNoErr)
2859
            *ok = false;
2860
        else
2861
        {
2862
            CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
2863
        }
2864
    }
2865
private:
2866
    Mat &src;
2867
    Mat &dst;
2868
    int mode;
2869
    double (&coeffs)[3][3];
2870
    int borderType;
2871
    const Scalar borderValue;
2872
    ippiWarpPerspectiveFunc func;
2873
    bool *ok;
2874

2875
    const IPPWarpPerspectiveInvoker& operator= (const IPPWarpPerspectiveInvoker&);
2876
};
2877
#endif
2878

2879
namespace hal {
2880

2881
void warpPerspective(int src_type,
2882
                    const uchar * src_data, size_t src_step, int src_width, int src_height,
2883
                    uchar * dst_data, size_t dst_step, int dst_width, int dst_height,
2884
                    const double M[9], int interpolation, int borderType, const double borderValue[4])
2885
{
2886
    CALL_HAL(warpPerspective, cv_hal_warpPerspective, src_type, src_data, src_step, src_width, src_height, dst_data, dst_step, dst_width, dst_height, M, interpolation, borderType, borderValue);
2887
    Mat src(Size(src_width, src_height), src_type, const_cast<uchar*>(src_data), src_step);
2888
    Mat dst(Size(dst_width, dst_height), src_type, dst_data, dst_step);
2889

2890
    Range range(0, dst.rows);
2891
    WarpPerspectiveInvoker invoker(src, dst, M, interpolation, borderType, Scalar(borderValue[0], borderValue[1], borderValue[2], borderValue[3]));
2892
    parallel_for_(range, invoker, dst.total()/(double)(1<<16));
2893
}
2894

2895
} // hal::
2896
} // cv::
2897

2898
void cv::warpPerspective( InputArray _src, OutputArray _dst, InputArray _M0,
2899
                          Size dsize, int flags, int borderType, const Scalar& borderValue )
2900
{
2901
    CV_INSTRUMENT_REGION();
2902

2903
    CV_Assert( _src.total() > 0 );
2904

2905
    CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat() &&
2906
               _src.cols() <= SHRT_MAX && _src.rows() <= SHRT_MAX,
2907
               ocl_warpTransform_cols4(_src, _dst, _M0, dsize, flags, borderType, borderValue,
2908
                                       OCL_OP_PERSPECTIVE))
2909

2910
    CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
2911
               ocl_warpTransform(_src, _dst, _M0, dsize, flags, borderType, borderValue,
2912
                              OCL_OP_PERSPECTIVE))
2913

2914
    Mat src = _src.getMat(), M0 = _M0.getMat();
2915
    _dst.create( dsize.empty() ? src.size() : dsize, src.type() );
2916
    Mat dst = _dst.getMat();
2917

2918
    if( dst.data == src.data )
2919
        src = src.clone();
2920

2921
    double M[9];
2922
    Mat matM(3, 3, CV_64F, M);
2923
    int interpolation = flags & INTER_MAX;
2924
    if( interpolation == INTER_AREA )
2925
        interpolation = INTER_LINEAR;
2926

2927
    CV_Assert( (M0.type() == CV_32F || M0.type() == CV_64F) && M0.rows == 3 && M0.cols == 3 );
2928
    M0.convertTo(matM, matM.type());
2929

2930
#if defined (HAVE_IPP) && IPP_VERSION_X100 >= 810 && !IPP_DISABLE_WARPPERSPECTIVE
2931
    CV_IPP_CHECK()
2932
    {
2933
        int type = src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
2934
        if( (depth == CV_8U || depth == CV_16U || depth == CV_32F) &&
2935
           (cn == 1 || cn == 3 || cn == 4) &&
2936
           ( borderType == cv::BORDER_TRANSPARENT || borderType == cv::BORDER_CONSTANT ) &&
2937
           (interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC))
2938
        {
2939
            ippiWarpPerspectiveFunc ippFunc = 0;
2940
            if ((flags & WARP_INVERSE_MAP) != 0)
2941
            {
2942
                ippFunc = type == CV_8UC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_8u_C1R :
2943
                type == CV_8UC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_8u_C3R :
2944
                type == CV_8UC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_8u_C4R :
2945
                type == CV_16UC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_16u_C1R :
2946
                type == CV_16UC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_16u_C3R :
2947
                type == CV_16UC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_16u_C4R :
2948
                type == CV_32FC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_32f_C1R :
2949
                type == CV_32FC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_32f_C3R :
2950
                type == CV_32FC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_32f_C4R : 0;
2951
            }
2952
            else
2953
            {
2954
                ippFunc = type == CV_8UC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_8u_C1R :
2955
                type == CV_8UC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_8u_C3R :
2956
                type == CV_8UC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_8u_C4R :
2957
                type == CV_16UC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_16u_C1R :
2958
                type == CV_16UC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_16u_C3R :
2959
                type == CV_16UC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_16u_C4R :
2960
                type == CV_32FC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_32f_C1R :
2961
                type == CV_32FC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_32f_C3R :
2962
                type == CV_32FC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_32f_C4R : 0;
2963
            }
2964
            int mode =
2965
            interpolation == INTER_NEAREST ? IPPI_INTER_NN :
2966
            interpolation == INTER_LINEAR ? IPPI_INTER_LINEAR :
2967
            interpolation == INTER_CUBIC ? IPPI_INTER_CUBIC : 0;
2968
            CV_Assert(mode && ippFunc);
2969

2970
            double coeffs[3][3];
2971
            for( int i = 0; i < 3; i++ )
2972
                for( int j = 0; j < 3; j++ )
2973
                    coeffs[i][j] = matM.at<double>(i, j);
2974

2975
            bool ok;
2976
            Range range(0, dst.rows);
2977
            IPPWarpPerspectiveInvoker invoker(src, dst, coeffs, mode, borderType, borderValue, ippFunc, &ok);
2978
            parallel_for_(range, invoker, dst.total()/(double)(1<<16));
2979
            if( ok )
2980
            {
2981
                CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
2982
                return;
2983
            }
2984
            setIppErrorStatus();
2985
        }
2986
    }
2987
#endif
2988

2989
    if( !(flags & WARP_INVERSE_MAP) )
2990
        invert(matM, matM);
2991

2992
    hal::warpPerspective(src.type(), src.data, src.step, src.cols, src.rows, dst.data, dst.step, dst.cols, dst.rows,
2993
                        matM.ptr<double>(), interpolation, borderType, borderValue.val);
2994
}
2995

2996

2997
cv::Mat cv::getRotationMatrix2D( Point2f center, double angle, double scale )
2998
{
2999
    CV_INSTRUMENT_REGION();
3000

3001
    angle *= CV_PI/180;
3002
    double alpha = std::cos(angle)*scale;
3003
    double beta = std::sin(angle)*scale;
3004

3005
    Mat M(2, 3, CV_64F);
3006
    double* m = M.ptr<double>();
3007

3008
    m[0] = alpha;
3009
    m[1] = beta;
3010
    m[2] = (1-alpha)*center.x - beta*center.y;
3011
    m[3] = -beta;
3012
    m[4] = alpha;
3013
    m[5] = beta*center.x + (1-alpha)*center.y;
3014

3015
    return M;
3016
}
3017

3018
/* Calculates coefficients of perspective transformation
3019
 * which maps (xi,yi) to (ui,vi), (i=1,2,3,4):
3020
 *
3021
 *      c00*xi + c01*yi + c02
3022
 * ui = ---------------------
3023
 *      c20*xi + c21*yi + c22
3024
 *
3025
 *      c10*xi + c11*yi + c12
3026
 * vi = ---------------------
3027
 *      c20*xi + c21*yi + c22
3028
 *
3029
 * Coefficients are calculated by solving linear system:
3030
 * / x0 y0  1  0  0  0 -x0*u0 -y0*u0 \ /c00\ /u0\
3031
 * | x1 y1  1  0  0  0 -x1*u1 -y1*u1 | |c01| |u1|
3032
 * | x2 y2  1  0  0  0 -x2*u2 -y2*u2 | |c02| |u2|
3033
 * | x3 y3  1  0  0  0 -x3*u3 -y3*u3 |.|c10|=|u3|,
3034
 * |  0  0  0 x0 y0  1 -x0*v0 -y0*v0 | |c11| |v0|
3035
 * |  0  0  0 x1 y1  1 -x1*v1 -y1*v1 | |c12| |v1|
3036
 * |  0  0  0 x2 y2  1 -x2*v2 -y2*v2 | |c20| |v2|
3037
 * \  0  0  0 x3 y3  1 -x3*v3 -y3*v3 / \c21/ \v3/
3038
 *
3039
 * where:
3040
 *   cij - matrix coefficients, c22 = 1
3041
 */
3042
cv::Mat cv::getPerspectiveTransform(const Point2f src[], const Point2f dst[], int solveMethod)
3043
{
3044
    CV_INSTRUMENT_REGION();
3045

3046
    Mat M(3, 3, CV_64F), X(8, 1, CV_64F, M.ptr());
3047
    double a[8][8], b[8];
3048
    Mat A(8, 8, CV_64F, a), B(8, 1, CV_64F, b);
3049

3050
    for( int i = 0; i < 4; ++i )
3051
    {
3052
        a[i][0] = a[i+4][3] = src[i].x;
3053
        a[i][1] = a[i+4][4] = src[i].y;
3054
        a[i][2] = a[i+4][5] = 1;
3055
        a[i][3] = a[i][4] = a[i][5] =
3056
        a[i+4][0] = a[i+4][1] = a[i+4][2] = 0;
3057
        a[i][6] = -src[i].x*dst[i].x;
3058
        a[i][7] = -src[i].y*dst[i].x;
3059
        a[i+4][6] = -src[i].x*dst[i].y;
3060
        a[i+4][7] = -src[i].y*dst[i].y;
3061
        b[i] = dst[i].x;
3062
        b[i+4] = dst[i].y;
3063
    }
3064

3065
    solve(A, B, X, solveMethod);
3066
    M.ptr<double>()[8] = 1.;
3067

3068
    return M;
3069
}
3070

3071
/* Calculates coefficients of affine transformation
3072
 * which maps (xi,yi) to (ui,vi), (i=1,2,3):
3073
 *
3074
 * ui = c00*xi + c01*yi + c02
3075
 *
3076
 * vi = c10*xi + c11*yi + c12
3077
 *
3078
 * Coefficients are calculated by solving linear system:
3079
 * / x0 y0  1  0  0  0 \ /c00\ /u0\
3080
 * | x1 y1  1  0  0  0 | |c01| |u1|
3081
 * | x2 y2  1  0  0  0 | |c02| |u2|
3082
 * |  0  0  0 x0 y0  1 | |c10| |v0|
3083
 * |  0  0  0 x1 y1  1 | |c11| |v1|
3084
 * \  0  0  0 x2 y2  1 / |c12| |v2|
3085
 *
3086
 * where:
3087
 *   cij - matrix coefficients
3088
 */
3089

3090
cv::Mat cv::getAffineTransform( const Point2f src[], const Point2f dst[] )
3091
{
3092
    Mat M(2, 3, CV_64F), X(6, 1, CV_64F, M.ptr());
3093
    double a[6*6], b[6];
3094
    Mat A(6, 6, CV_64F, a), B(6, 1, CV_64F, b);
3095

3096
    for( int i = 0; i < 3; i++ )
3097
    {
3098
        int j = i*12;
3099
        int k = i*12+6;
3100
        a[j] = a[k+3] = src[i].x;
3101
        a[j+1] = a[k+4] = src[i].y;
3102
        a[j+2] = a[k+5] = 1;
3103
        a[j+3] = a[j+4] = a[j+5] = 0;
3104
        a[k] = a[k+1] = a[k+2] = 0;
3105
        b[i*2] = dst[i].x;
3106
        b[i*2+1] = dst[i].y;
3107
    }
3108

3109
    solve( A, B, X );
3110
    return M;
3111
}
3112

3113
void cv::invertAffineTransform(InputArray _matM, OutputArray __iM)
3114
{
3115
    Mat matM = _matM.getMat();
3116
    CV_Assert(matM.rows == 2 && matM.cols == 3);
3117
    __iM.create(2, 3, matM.type());
3118
    Mat _iM = __iM.getMat();
3119

3120
    if( matM.type() == CV_32F )
3121
    {
3122
        const softfloat* M = matM.ptr<softfloat>();
3123
        softfloat* iM = _iM.ptr<softfloat>();
3124
        int step = (int)(matM.step/sizeof(M[0])), istep = (int)(_iM.step/sizeof(iM[0]));
3125

3126
        softdouble D = M[0]*M[step+1] - M[1]*M[step];
3127
        D = D != 0. ? softdouble(1.)/D : softdouble(0.);
3128
        softdouble A11 = M[step+1]*D, A22 = M[0]*D, A12 = -M[1]*D, A21 = -M[step]*D;
3129
        softdouble b1 = -A11*M[2] - A12*M[step+2];
3130
        softdouble b2 = -A21*M[2] - A22*M[step+2];
3131

3132
        iM[0] = A11; iM[1] = A12; iM[2] = b1;
3133
        iM[istep] = A21; iM[istep+1] = A22; iM[istep+2] = b2;
3134
    }
3135
    else if( matM.type() == CV_64F )
3136
    {
3137
        const softdouble* M = matM.ptr<softdouble>();
3138
        softdouble* iM = _iM.ptr<softdouble>();
3139
        int step = (int)(matM.step/sizeof(M[0])), istep = (int)(_iM.step/sizeof(iM[0]));
3140

3141
        softdouble D = M[0]*M[step+1] - M[1]*M[step];
3142
        D = D != 0. ? softdouble(1.)/D : softdouble(0.);
3143
        softdouble A11 = M[step+1]*D, A22 = M[0]*D, A12 = -M[1]*D, A21 = -M[step]*D;
3144
        softdouble b1 = -A11*M[2] - A12*M[step+2];
3145
        softdouble b2 = -A21*M[2] - A22*M[step+2];
3146

3147
        iM[0] = A11; iM[1] = A12; iM[2] = b1;
3148
        iM[istep] = A21; iM[istep+1] = A22; iM[istep+2] = b2;
3149
    }
3150
    else
3151
        CV_Error( CV_StsUnsupportedFormat, "" );
3152
}
3153

3154
cv::Mat cv::getPerspectiveTransform(InputArray _src, InputArray _dst, int solveMethod)
3155
{
3156
    Mat src = _src.getMat(), dst = _dst.getMat();
3157
    CV_Assert(src.checkVector(2, CV_32F) == 4 && dst.checkVector(2, CV_32F) == 4);
3158
    return getPerspectiveTransform((const Point2f*)src.data, (const Point2f*)dst.data, solveMethod);
3159
}
3160

3161
cv::Mat cv::getAffineTransform(InputArray _src, InputArray _dst)
3162
{
3163
    Mat src = _src.getMat(), dst = _dst.getMat();
3164
    CV_Assert(src.checkVector(2, CV_32F) == 3 && dst.checkVector(2, CV_32F) == 3);
3165
    return getAffineTransform((const Point2f*)src.data, (const Point2f*)dst.data);
3166
}
3167

3168
CV_IMPL void
3169
cvWarpAffine( const CvArr* srcarr, CvArr* dstarr, const CvMat* marr,
3170
              int flags, CvScalar fillval )
3171
{
3172
    cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
3173
    cv::Mat matrix = cv::cvarrToMat(marr);
3174
    CV_Assert( src.type() == dst.type() );
3175
    cv::warpAffine( src, dst, matrix, dst.size(), flags,
3176
        (flags & CV_WARP_FILL_OUTLIERS) ? cv::BORDER_CONSTANT : cv::BORDER_TRANSPARENT,
3177
        fillval );
3178
}
3179

3180
CV_IMPL void
3181
cvWarpPerspective( const CvArr* srcarr, CvArr* dstarr, const CvMat* marr,
3182
                   int flags, CvScalar fillval )
3183
{
3184
    cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
3185
    cv::Mat matrix = cv::cvarrToMat(marr);
3186
    CV_Assert( src.type() == dst.type() );
3187
    cv::warpPerspective( src, dst, matrix, dst.size(), flags,
3188
        (flags & CV_WARP_FILL_OUTLIERS) ? cv::BORDER_CONSTANT : cv::BORDER_TRANSPARENT,
3189
        fillval );
3190
}
3191

3192
CV_IMPL void
3193
cvRemap( const CvArr* srcarr, CvArr* dstarr,
3194
         const CvArr* _mapx, const CvArr* _mapy,
3195
         int flags, CvScalar fillval )
3196
{
3197
    cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr), dst0 = dst;
3198
    cv::Mat mapx = cv::cvarrToMat(_mapx), mapy = cv::cvarrToMat(_mapy);
3199
    CV_Assert( src.type() == dst.type() && dst.size() == mapx.size() );
3200
    cv::remap( src, dst, mapx, mapy, flags & cv::INTER_MAX,
3201
        (flags & CV_WARP_FILL_OUTLIERS) ? cv::BORDER_CONSTANT : cv::BORDER_TRANSPARENT,
3202
        fillval );
3203
    CV_Assert( dst0.data == dst.data );
3204
}
3205

3206

3207
CV_IMPL CvMat*
3208
cv2DRotationMatrix( CvPoint2D32f center, double angle,
3209
                    double scale, CvMat* matrix )
3210
{
3211
    cv::Mat M0 = cv::cvarrToMat(matrix), M = cv::getRotationMatrix2D(center, angle, scale);
3212
    CV_Assert( M.size() == M0.size() );
3213
    M.convertTo(M0, M0.type());
3214
    return matrix;
3215
}
3216

3217

3218
CV_IMPL CvMat*
3219
cvGetPerspectiveTransform( const CvPoint2D32f* src,
3220
                          const CvPoint2D32f* dst,
3221
                          CvMat* matrix )
3222
{
3223
    cv::Mat M0 = cv::cvarrToMat(matrix),
3224
        M = cv::getPerspectiveTransform((const cv::Point2f*)src, (const cv::Point2f*)dst);
3225
    CV_Assert( M.size() == M0.size() );
3226
    M.convertTo(M0, M0.type());
3227
    return matrix;
3228
}
3229

3230

3231
CV_IMPL CvMat*
3232
cvGetAffineTransform( const CvPoint2D32f* src,
3233
                          const CvPoint2D32f* dst,
3234
                          CvMat* matrix )
3235
{
3236
    cv::Mat M0 = cv::cvarrToMat(matrix),
3237
        M = cv::getAffineTransform((const cv::Point2f*)src, (const cv::Point2f*)dst);
3238
    CV_Assert( M.size() == M0.size() );
3239
    M.convertTo(M0, M0.type());
3240
    return matrix;
3241
}
3242

3243

3244
CV_IMPL void
3245
cvConvertMaps( const CvArr* arr1, const CvArr* arr2, CvArr* dstarr1, CvArr* dstarr2 )
3246
{
3247
    cv::Mat map1 = cv::cvarrToMat(arr1), map2;
3248
    cv::Mat dstmap1 = cv::cvarrToMat(dstarr1), dstmap2;
3249

3250
    if( arr2 )
3251
        map2 = cv::cvarrToMat(arr2);
3252
    if( dstarr2 )
3253
    {
3254
        dstmap2 = cv::cvarrToMat(dstarr2);
3255
        if( dstmap2.type() == CV_16SC1 )
3256
            dstmap2 = cv::Mat(dstmap2.size(), CV_16UC1, dstmap2.ptr(), dstmap2.step);
3257
    }
3258

3259
    cv::convertMaps( map1, map2, dstmap1, dstmap2, dstmap1.type(), false );
3260
}
3261

3262
/****************************************************************************************
3263
PkLab.net 2018 based on cv::linearPolar from OpenCV by J.L. Blanco, Apr 2009
3264
****************************************************************************************/
3265
void cv::warpPolar(InputArray _src, OutputArray _dst, Size dsize,
3266
                   Point2f center, double maxRadius, int flags)
3267
{
3268
    // if dest size is empty given than calculate using proportional setting
3269
    // thus we calculate needed angles to keep same area as bounding circle
3270
    if ((dsize.width <= 0) && (dsize.height <= 0))
3271
    {
3272
        dsize.width = cvRound(maxRadius);
3273
        dsize.height = cvRound(maxRadius * CV_PI);
3274
    }
3275
    else if (dsize.height <= 0)
3276
    {
3277
        dsize.height = cvRound(dsize.width * CV_PI);
3278
    }
3279

3280
    Mat mapx, mapy;
3281
    mapx.create(dsize, CV_32F);
3282
    mapy.create(dsize, CV_32F);
3283
    bool semiLog = (flags & WARP_POLAR_LOG) != 0;
3284

3285
    if (!(flags & CV_WARP_INVERSE_MAP))
3286
    {
3287
        CV_Assert(!dsize.empty());
3288
        double Kangle = CV_2PI / dsize.height;
3289
        int phi, rho;
3290

3291
        // precalculate scaled rho
3292
        Mat rhos = Mat(1, dsize.width, CV_32F);
3293
        float* bufRhos = (float*)(rhos.data);
3294
        if (semiLog)
3295
        {
3296
            double Kmag = std::log(maxRadius) / dsize.width;
3297
            for (rho = 0; rho < dsize.width; rho++)
3298
                bufRhos[rho] = (float)(std::exp(rho * Kmag) - 1.0);
3299

3300
        }
3301
        else
3302
        {
3303
            double Kmag = maxRadius / dsize.width;
3304
            for (rho = 0; rho < dsize.width; rho++)
3305
                bufRhos[rho] = (float)(rho * Kmag);
3306
        }
3307

3308
        for (phi = 0; phi < dsize.height; phi++)
3309
        {
3310
            double KKy = Kangle * phi;
3311
            double cp = std::cos(KKy);
3312
            double sp = std::sin(KKy);
3313
            float* mx = (float*)(mapx.data + phi*mapx.step);
3314
            float* my = (float*)(mapy.data + phi*mapy.step);
3315

3316
            for (rho = 0; rho < dsize.width; rho++)
3317
            {
3318
                double x = bufRhos[rho] * cp + center.x;
3319
                double y = bufRhos[rho] * sp + center.y;
3320

3321
                mx[rho] = (float)x;
3322
                my[rho] = (float)y;
3323
            }
3324
        }
3325
        remap(_src, _dst, mapx, mapy, flags & cv::INTER_MAX, (flags & CV_WARP_FILL_OUTLIERS) ? cv::BORDER_CONSTANT : cv::BORDER_TRANSPARENT);
3326
    }
3327
    else
3328
    {
3329
        const int ANGLE_BORDER = 1;
3330
        cv::copyMakeBorder(_src, _dst, ANGLE_BORDER, ANGLE_BORDER, 0, 0, BORDER_WRAP);
3331
        Mat src = _dst.getMat();
3332
        Size ssize = _dst.size();
3333
        ssize.height -= 2 * ANGLE_BORDER;
3334
        CV_Assert(!ssize.empty());
3335
        const double Kangle = CV_2PI / ssize.height;
3336
        double Kmag;
3337
        if (semiLog)
3338
            Kmag = std::log(maxRadius) / ssize.width;
3339
        else
3340
            Kmag = maxRadius / ssize.width;
3341

3342
        int x, y;
3343
        Mat bufx, bufy, bufp, bufa;
3344

3345
        bufx = Mat(1, dsize.width, CV_32F);
3346
        bufy = Mat(1, dsize.width, CV_32F);
3347
        bufp = Mat(1, dsize.width, CV_32F);
3348
        bufa = Mat(1, dsize.width, CV_32F);
3349

3350
        for (x = 0; x < dsize.width; x++)
3351
            bufx.at<float>(0, x) = (float)x - center.x;
3352

3353
        for (y = 0; y < dsize.height; y++)
3354
        {
3355
            float* mx = (float*)(mapx.data + y*mapx.step);
3356
            float* my = (float*)(mapy.data + y*mapy.step);
3357

3358
            for (x = 0; x < dsize.width; x++)
3359
                bufy.at<float>(0, x) = (float)y - center.y;
3360

3361
            cartToPolar(bufx, bufy, bufp, bufa, 0);
3362

3363
            if (semiLog)
3364
            {
3365
                bufp += 1.f;
3366
                log(bufp, bufp);
3367
            }
3368

3369
            for (x = 0; x < dsize.width; x++)
3370
            {
3371
                double rho = bufp.at<float>(0, x) / Kmag;
3372
                double phi = bufa.at<float>(0, x) / Kangle;
3373
                mx[x] = (float)rho;
3374
                my[x] = (float)phi + ANGLE_BORDER;
3375
            }
3376
        }
3377
        remap(src, _dst, mapx, mapy, flags & cv::INTER_MAX,
3378
              (flags & CV_WARP_FILL_OUTLIERS) ? cv::BORDER_CONSTANT : cv::BORDER_TRANSPARENT);
3379
    }
3380
}
3381

3382
void cv::linearPolar( InputArray _src, OutputArray _dst,
3383
                      Point2f center, double maxRadius, int flags )
3384
{
3385
    warpPolar(_src, _dst, _src.size(), center, maxRadius, flags & ~WARP_POLAR_LOG);
3386
}
3387

3388
void cv::logPolar( InputArray _src, OutputArray _dst,
3389
                   Point2f center, double maxRadius, int flags )
3390
{
3391
    Size ssize = _src.size();
3392
    double M = maxRadius > 0 ? std::exp(ssize.width / maxRadius) : 1;
3393
    warpPolar(_src, _dst, ssize, center, M, flags | WARP_POLAR_LOG);
3394
}
3395

3396
CV_IMPL
3397
void cvLinearPolar( const CvArr* srcarr, CvArr* dstarr,
3398
                    CvPoint2D32f center, double maxRadius, int flags )
3399
{
3400
    Mat src = cvarrToMat(srcarr);
3401
    Mat dst = cvarrToMat(dstarr);
3402

3403
    CV_Assert(src.size == dst.size);
3404
    CV_Assert(src.type() == dst.type());
3405

3406
    cv::linearPolar(src, dst, center, maxRadius, flags);
3407
}
3408

3409
CV_IMPL
3410
void cvLogPolar( const CvArr* srcarr, CvArr* dstarr,
3411
                 CvPoint2D32f center, double M, int flags )
3412
{
3413
    Mat src = cvarrToMat(srcarr);
3414
    Mat dst = cvarrToMat(dstarr);
3415

3416
    CV_Assert(src.size == dst.size);
3417
    CV_Assert(src.type() == dst.type());
3418

3419
    cv::logPolar(src, dst, center, M, flags);
3420
}
3421

3422
/* End of file. */
3423

3424
Product

Resources

Company