CoCalc -- deriv.cpp

GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/imgproc/src/deriv.cpp
¹⁶³⁵⁴ views
1
/*M///////////////////////////////////////////////////////////////////////////////////////
2
//
3
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4
//
5
//  By downloading, copying, installing or using the software you agree to this license.
6
//  If you do not agree to this license, do not download, install,
7
//  copy or use the software.
8
//
9
//
10
//                        Intel License Agreement
11
//                For Open Source Computer Vision Library
12
//
13
// Copyright (C) 2000, Intel Corporation, all rights reserved.
14
// Copyright (C) 2014, Itseez, Inc, all rights reserved.
15
// Third party copyrights are property of their respective owners.
16
//
17
// Redistribution and use in source and binary forms, with or without modification,
18
// are permitted provided that the following conditions are met:
19
//
20
//   * Redistribution's of source code must retain the above copyright notice,
21
//     this list of conditions and the following disclaimer.
22
//
23
//   * Redistribution's in binary form must reproduce the above copyright notice,
24
//     this list of conditions and the following disclaimer in the documentation
25
//     and/or other materials provided with the distribution.
26
//
27
//   * The name of Intel Corporation may not be used to endorse or promote products
28
//     derived from this software without specific prior written permission.
29
//
30
// This software is provided by the copyright holders and contributors "as is" and
31
// any express or implied warranties, including, but not limited to, the implied
32
// warranties of merchantability and fitness for a particular purpose are disclaimed.
33
// In no event shall the Intel Corporation or contributors be liable for any direct,
34
// indirect, incidental, special, exemplary, or consequential damages
35
// (including, but not limited to, procurement of substitute goods or services;
36
// loss of use, data, or profits; or business interruption) however caused
37
// and on any theory of liability, whether in contract, strict liability,
38
// or tort (including negligence or otherwise) arising in any way out of
39
// the use of this software, even if advised of the possibility of such damage.
40
//
41
//M*/
42

43
#include "precomp.hpp"
44
#include "opencl_kernels_imgproc.hpp"
45

46
#include "opencv2/core/openvx/ovx_defs.hpp"
47
#include "filter.hpp"
48

49
/****************************************************************************************\
50
                             Sobel & Scharr Derivative Filters
51
\****************************************************************************************/
52

53
namespace cv
54
{
55

56
static void getScharrKernels( OutputArray _kx, OutputArray _ky,
57
                              int dx, int dy, bool normalize, int ktype )
58
{
59
    const int ksize = 3;
60

61
    CV_Assert( ktype == CV_32F || ktype == CV_64F );
62
    _kx.create(ksize, 1, ktype, -1, true);
63
    _ky.create(ksize, 1, ktype, -1, true);
64
    Mat kx = _kx.getMat();
65
    Mat ky = _ky.getMat();
66

67
    CV_Assert( dx >= 0 && dy >= 0 && dx+dy == 1 );
68

69
    for( int k = 0; k < 2; k++ )
70
    {
71
        Mat* kernel = k == 0 ? &kx : &ky;
72
        int order = k == 0 ? dx : dy;
73
        int kerI[3];
74

75
        if( order == 0 )
76
            kerI[0] = 3, kerI[1] = 10, kerI[2] = 3;
77
        else if( order == 1 )
78
            kerI[0] = -1, kerI[1] = 0, kerI[2] = 1;
79

80
        Mat temp(kernel->rows, kernel->cols, CV_32S, &kerI[0]);
81
        double scale = !normalize || order == 1 ? 1. : 1./32;
82
        temp.convertTo(*kernel, ktype, scale);
83
    }
84
}
85

86

87
static void getSobelKernels( OutputArray _kx, OutputArray _ky,
88
                             int dx, int dy, int _ksize, bool normalize, int ktype )
89
{
90
    int i, j, ksizeX = _ksize, ksizeY = _ksize;
91
    if( ksizeX == 1 && dx > 0 )
92
        ksizeX = 3;
93
    if( ksizeY == 1 && dy > 0 )
94
        ksizeY = 3;
95

96
    CV_Assert( ktype == CV_32F || ktype == CV_64F );
97

98
    _kx.create(ksizeX, 1, ktype, -1, true);
99
    _ky.create(ksizeY, 1, ktype, -1, true);
100
    Mat kx = _kx.getMat();
101
    Mat ky = _ky.getMat();
102

103
    if( _ksize % 2 == 0 || _ksize > 31 )
104
        CV_Error( CV_StsOutOfRange, "The kernel size must be odd and not larger than 31" );
105
    std::vector<int> kerI(std::max(ksizeX, ksizeY) + 1);
106

107
    CV_Assert( dx >= 0 && dy >= 0 && dx+dy > 0 );
108

109
    for( int k = 0; k < 2; k++ )
110
    {
111
        Mat* kernel = k == 0 ? &kx : &ky;
112
        int order = k == 0 ? dx : dy;
113
        int ksize = k == 0 ? ksizeX : ksizeY;
114

115
        CV_Assert( ksize > order );
116

117
        if( ksize == 1 )
118
            kerI[0] = 1;
119
        else if( ksize == 3 )
120
        {
121
            if( order == 0 )
122
                kerI[0] = 1, kerI[1] = 2, kerI[2] = 1;
123
            else if( order == 1 )
124
                kerI[0] = -1, kerI[1] = 0, kerI[2] = 1;
125
            else
126
                kerI[0] = 1, kerI[1] = -2, kerI[2] = 1;
127
        }
128
        else
129
        {
130
            int oldval, newval;
131
            kerI[0] = 1;
132
            for( i = 0; i < ksize; i++ )
133
                kerI[i+1] = 0;
134

135
            for( i = 0; i < ksize - order - 1; i++ )
136
            {
137
                oldval = kerI[0];
138
                for( j = 1; j <= ksize; j++ )
139
                {
140
                    newval = kerI[j]+kerI[j-1];
141
                    kerI[j-1] = oldval;
142
                    oldval = newval;
143
                }
144
            }
145

146
            for( i = 0; i < order; i++ )
147
            {
148
                oldval = -kerI[0];
149
                for( j = 1; j <= ksize; j++ )
150
                {
151
                    newval = kerI[j-1] - kerI[j];
152
                    kerI[j-1] = oldval;
153
                    oldval = newval;
154
                }
155
            }
156
        }
157

158
        Mat temp(kernel->rows, kernel->cols, CV_32S, &kerI[0]);
159
        double scale = !normalize ? 1. : 1./(1 << (ksize-order-1));
160
        temp.convertTo(*kernel, ktype, scale);
161
    }
162
}
163

164
}
165

166
void cv::getDerivKernels( OutputArray kx, OutputArray ky, int dx, int dy,
167
                          int ksize, bool normalize, int ktype )
168
{
169
    if( ksize <= 0 )
170
        getScharrKernels( kx, ky, dx, dy, normalize, ktype );
171
    else
172
        getSobelKernels( kx, ky, dx, dy, ksize, normalize, ktype );
173
}
174

175

176
cv::Ptr<cv::FilterEngine> cv::createDerivFilter(int srcType, int dstType,
177
                                                int dx, int dy, int ksize, int borderType )
178
{
179
    Mat kx, ky;
180
    getDerivKernels( kx, ky, dx, dy, ksize, false, CV_32F );
181
    return createSeparableLinearFilter(srcType, dstType,
182
        kx, ky, Point(-1,-1), 0, borderType );
183
}
184

185
#ifdef HAVE_OPENVX
186
namespace cv
187
{
188
    namespace ovx {
189
        template <> inline bool skipSmallImages<VX_KERNEL_SOBEL_3x3>(int w, int h) { return w*h < 320 * 240; }
190
    }
191
    static bool openvx_sobel(InputArray _src, OutputArray _dst,
192
                             int dx, int dy, int ksize,
193
                             double scale, double delta, int borderType)
194
    {
195
        if (_src.type() != CV_8UC1 || _dst.type() != CV_16SC1 ||
196
            ksize != 3 || scale != 1.0 || delta != 0.0 ||
197
            (dx | dy) != 1 || (dx + dy) != 1 ||
198
            _src.cols() < ksize || _src.rows() < ksize ||
199
            ovx::skipSmallImages<VX_KERNEL_SOBEL_3x3>(_src.cols(), _src.rows())
200
            )
201
            return false;
202

203
        Mat src = _src.getMat();
204
        Mat dst = _dst.getMat();
205

206
        if ((borderType & BORDER_ISOLATED) == 0 && src.isSubmatrix())
207
            return false; //Process isolated borders only
208
        vx_enum border;
209
        switch (borderType & ~BORDER_ISOLATED)
210
        {
211
        case BORDER_CONSTANT:
212
            border = VX_BORDER_CONSTANT;
213
            break;
214
        case BORDER_REPLICATE:
215
//            border = VX_BORDER_REPLICATE;
216
//            break;
217
        default:
218
            return false;
219
        }
220

221
        try
222
        {
223
            ivx::Context ctx = ovx::getOpenVXContext();
224
            //if ((vx_size)ksize > ctx.convolutionMaxDimension())
225
            //    return false;
226

227
            Mat a;
228
            if (dst.data != src.data)
229
                a = src;
230
            else
231
                src.copyTo(a);
232

233
            ivx::Image
234
                ia = ivx::Image::createFromHandle(ctx, VX_DF_IMAGE_U8,
235
                    ivx::Image::createAddressing(a.cols, a.rows, 1, (vx_int32)(a.step)), a.data),
236
                ib = ivx::Image::createFromHandle(ctx, VX_DF_IMAGE_S16,
237
                    ivx::Image::createAddressing(dst.cols, dst.rows, 2, (vx_int32)(dst.step)), dst.data);
238

239
            //ATTENTION: VX_CONTEXT_IMMEDIATE_BORDER attribute change could lead to strange issues in multi-threaded environments
240
            //since OpenVX standard says nothing about thread-safety for now
241
            ivx::border_t prevBorder = ctx.immediateBorder();
242
            ctx.setImmediateBorder(border, (vx_uint8)(0));
243
            if(dx)
244
                ivx::IVX_CHECK_STATUS(vxuSobel3x3(ctx, ia, ib, NULL));
245
            else
246
                ivx::IVX_CHECK_STATUS(vxuSobel3x3(ctx, ia, NULL, ib));
247
            ctx.setImmediateBorder(prevBorder);
248
        }
249
        catch (ivx::RuntimeError & e)
250
        {
251
            VX_DbgThrow(e.what());
252
        }
253
        catch (ivx::WrapperError & e)
254
        {
255
            VX_DbgThrow(e.what());
256
        }
257

258
        return true;
259
    }
260
}
261
#endif
262

263
#ifdef HAVE_IPP
264
namespace cv
265
{
266

267
static bool ipp_Deriv(InputArray _src, OutputArray _dst, int dx, int dy, int ksize, double scale, double delta, int borderType)
268
{
269
#ifdef HAVE_IPP_IW
270
    CV_INSTRUMENT_REGION_IPP();
271

272
    ::ipp::IwiSize size(_src.size().width, _src.size().height);
273
    IppDataType   srcType   = ippiGetDataType(_src.depth());
274
    IppDataType   dstType   = ippiGetDataType(_dst.depth());
275
    int           channels  = _src.channels();
276
    bool          useScale  = false;
277
    bool          useScharr = false;
278

279
    if(channels != _dst.channels() || channels > 1)
280
        return false;
281

282
    if(fabs(delta) > FLT_EPSILON || fabs(scale-1) > FLT_EPSILON)
283
        useScale = true;
284

285
    if(ksize <= 0)
286
    {
287
        ksize     = 3;
288
        useScharr = true;
289
    }
290

291
    IppiMaskSize maskSize = ippiGetMaskSize(ksize, ksize);
292
    if((int)maskSize < 0)
293
        return false;
294

295
#if IPP_VERSION_X100 <= 201703
296
    // Bug with mirror wrap
297
    if(borderType == BORDER_REFLECT_101 && (ksize/2+1 > size.width || ksize/2+1 > size.height))
298
        return false;
299
#endif
300

301
    IwiDerivativeType derivType = ippiGetDerivType(dx, dy, (useScharr)?false:true);
302
    if((int)derivType < 0)
303
        return false;
304

305
    // Acquire data and begin processing
306
    try
307
    {
308
        Mat src = _src.getMat();
309
        Mat dst = _dst.getMat();
310
        ::ipp::IwiImage iwSrc      = ippiGetImage(src);
311
        ::ipp::IwiImage iwDst      = ippiGetImage(dst);
312
        ::ipp::IwiImage iwSrcProc  = iwSrc;
313
        ::ipp::IwiImage iwDstProc  = iwDst;
314
        ::ipp::IwiBorderSize  borderSize(maskSize);
315
        ::ipp::IwiBorderType  ippBorder(ippiGetBorder(iwSrc, borderType, borderSize));
316
        if(!ippBorder)
317
            return false;
318

319
        if(srcType == ipp8u && dstType == ipp8u)
320
        {
321
            iwDstProc.Alloc(iwDst.m_size, ipp16s, channels);
322
            useScale = true;
323
        }
324
        else if(srcType == ipp8u && dstType == ipp32f)
325
        {
326
            iwSrc -= borderSize;
327
            iwSrcProc.Alloc(iwSrc.m_size, ipp32f, channels);
328
            CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, iwSrc, iwSrcProc, 1, 0, ::ipp::IwiScaleParams(ippAlgHintFast));
329
            iwSrcProc += borderSize;
330
        }
331

332
        if(useScharr)
333
            CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterScharr, iwSrcProc, iwDstProc, derivType, maskSize, ::ipp::IwDefault(), ippBorder);
334
        else
335
            CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterSobel, iwSrcProc, iwDstProc, derivType, maskSize, ::ipp::IwDefault(), ippBorder);
336

337
        if(useScale)
338
            CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, iwDstProc, iwDst, scale, delta, ::ipp::IwiScaleParams(ippAlgHintFast));
339
    }
340
    catch (const ::ipp::IwException &)
341
    {
342
        return false;
343
    }
344

345
    return true;
346
#else
347
    CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(dx); CV_UNUSED(dy); CV_UNUSED(ksize); CV_UNUSED(scale); CV_UNUSED(delta); CV_UNUSED(borderType);
348
    return false;
349
#endif
350
}
351
}
352
#endif
353

354
#ifdef HAVE_OPENCL
355
namespace cv
356
{
357
static bool ocl_sepFilter3x3_8UC1(InputArray _src, OutputArray _dst, int ddepth,
358
                                  InputArray _kernelX, InputArray _kernelY, double delta, int borderType)
359
{
360
    const ocl::Device & dev = ocl::Device::getDefault();
361
    int type = _src.type(), sdepth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
362

363
    if ( !(dev.isIntel() && (type == CV_8UC1) && (ddepth == CV_8U) &&
364
         (_src.offset() == 0) && (_src.step() % 4 == 0) &&
365
         (_src.cols() % 16 == 0) && (_src.rows() % 2 == 0)) )
366
        return false;
367

368
    Mat kernelX = _kernelX.getMat().reshape(1, 1);
369
    if (kernelX.cols % 2 != 1)
370
        return false;
371
    Mat kernelY = _kernelY.getMat().reshape(1, 1);
372
    if (kernelY.cols % 2 != 1)
373
        return false;
374

375
    if (ddepth < 0)
376
        ddepth = sdepth;
377

378
    Size size = _src.size();
379
    size_t globalsize[2] = { 0, 0 };
380
    size_t localsize[2] = { 0, 0 };
381

382
    globalsize[0] = size.width / 16;
383
    globalsize[1] = size.height / 2;
384

385
    const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", 0, "BORDER_REFLECT_101" };
386
    char build_opts[1024];
387
    sprintf(build_opts, "-D %s %s%s", borderMap[borderType],
388
            ocl::kernelToStr(kernelX, CV_32F, "KERNEL_MATRIX_X").c_str(),
389
            ocl::kernelToStr(kernelY, CV_32F, "KERNEL_MATRIX_Y").c_str());
390

391
    ocl::Kernel kernel("sepFilter3x3_8UC1_cols16_rows2", cv::ocl::imgproc::sepFilter3x3_oclsrc, build_opts);
392
    if (kernel.empty())
393
        return false;
394

395
    UMat src = _src.getUMat();
396
    _dst.create(size, CV_MAKETYPE(ddepth, cn));
397
    if (!(_dst.offset() == 0 && _dst.step() % 4 == 0))
398
        return false;
399
    UMat dst = _dst.getUMat();
400

401
    int idxArg = kernel.set(0, ocl::KernelArg::PtrReadOnly(src));
402
    idxArg = kernel.set(idxArg, (int)src.step);
403
    idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dst));
404
    idxArg = kernel.set(idxArg, (int)dst.step);
405
    idxArg = kernel.set(idxArg, (int)dst.rows);
406
    idxArg = kernel.set(idxArg, (int)dst.cols);
407
    idxArg = kernel.set(idxArg, static_cast<float>(delta));
408

409
    return kernel.run(2, globalsize, (localsize[0] == 0) ? NULL : localsize, false);
410
}
411
}
412
#endif
413

414
void cv::Sobel( InputArray _src, OutputArray _dst, int ddepth, int dx, int dy,
415
                int ksize, double scale, double delta, int borderType )
416
{
417
    CV_INSTRUMENT_REGION();
418

419
    int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype);
420
    if (ddepth < 0)
421
        ddepth = sdepth;
422
    int dtype = CV_MAKE_TYPE(ddepth, cn);
423
    _dst.create( _src.size(), dtype );
424

425
    int ktype = std::max(CV_32F, std::max(ddepth, sdepth));
426

427
    Mat kx, ky;
428
    getDerivKernels( kx, ky, dx, dy, ksize, false, ktype );
429
    if( scale != 1 )
430
    {
431
        // usually the smoothing part is the slowest to compute,
432
        // so try to scale it instead of the faster differentiating part
433
        if( dx == 0 )
434
            kx *= scale;
435
        else
436
            ky *= scale;
437
    }
438

439
    CV_OCL_RUN(ocl::isOpenCLActivated() && _dst.isUMat() && _src.dims() <= 2 && ksize == 3 &&
440
               (size_t)_src.rows() > ky.total() && (size_t)_src.cols() > kx.total(),
441
               ocl_sepFilter3x3_8UC1(_src, _dst, ddepth, kx, ky, delta, borderType));
442

443
    CV_OCL_RUN(ocl::isOpenCLActivated() && _dst.isUMat() && _src.dims() <= 2 && (size_t)_src.rows() > kx.total() && (size_t)_src.cols() > kx.total(),
444
               ocl_sepFilter2D(_src, _dst, ddepth, kx, ky, Point(-1, -1), 0, borderType))
445

446
    Mat src = _src.getMat();
447
    Mat dst = _dst.getMat();
448

449
    Point ofs;
450
    Size wsz(src.cols, src.rows);
451
    if(!(borderType & BORDER_ISOLATED))
452
        src.locateROI( wsz, ofs );
453

454
    CALL_HAL(sobel, cv_hal_sobel, src.ptr(), src.step, dst.ptr(), dst.step, src.cols, src.rows, sdepth, ddepth, cn,
455
             ofs.x, ofs.y, wsz.width - src.cols - ofs.x, wsz.height - src.rows - ofs.y, dx, dy, ksize, scale, delta, borderType&~BORDER_ISOLATED);
456

457
    CV_OVX_RUN(true,
458
               openvx_sobel(src, dst, dx, dy, ksize, scale, delta, borderType))
459

460
    CV_IPP_RUN_FAST(ipp_Deriv(src, dst, dx, dy, ksize, scale, delta, borderType));
461

462
    sepFilter2D(src, dst, ddepth, kx, ky, Point(-1, -1), delta, borderType );
463
}
464

465

466
void cv::Scharr( InputArray _src, OutputArray _dst, int ddepth, int dx, int dy,
467
                 double scale, double delta, int borderType )
468
{
469
    CV_INSTRUMENT_REGION();
470

471
    int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype);
472
    if (ddepth < 0)
473
        ddepth = sdepth;
474
    int dtype = CV_MAKETYPE(ddepth, cn);
475
    _dst.create( _src.size(), dtype );
476

477
    int ktype = std::max(CV_32F, std::max(ddepth, sdepth));
478

479
    Mat kx, ky;
480
    getScharrKernels( kx, ky, dx, dy, false, ktype );
481
    if( scale != 1 )
482
    {
483
        // usually the smoothing part is the slowest to compute,
484
        // so try to scale it instead of the faster differentiating part
485
        if( dx == 0 )
486
            kx *= scale;
487
        else
488
            ky *= scale;
489
    }
490

491
    CV_OCL_RUN(ocl::isOpenCLActivated() && _dst.isUMat() && _src.dims() <= 2 &&
492
               (size_t)_src.rows() > ky.total() && (size_t)_src.cols() > kx.total(),
493
               ocl_sepFilter3x3_8UC1(_src, _dst, ddepth, kx, ky, delta, borderType));
494

495
    CV_OCL_RUN(ocl::isOpenCLActivated() && _dst.isUMat() && _src.dims() <= 2 &&
496
               (size_t)_src.rows() > kx.total() && (size_t)_src.cols() > kx.total(),
497
               ocl_sepFilter2D(_src, _dst, ddepth, kx, ky, Point(-1, -1), 0, borderType))
498

499
    Mat src = _src.getMat();
500
    Mat dst = _dst.getMat();
501

502
    Point ofs;
503
    Size wsz(src.cols, src.rows);
504
    if(!(borderType & BORDER_ISOLATED))
505
        src.locateROI( wsz, ofs );
506

507
    CALL_HAL(scharr, cv_hal_scharr, src.ptr(), src.step, dst.ptr(), dst.step, src.cols, src.rows, sdepth, ddepth, cn,
508
             ofs.x, ofs.y, wsz.width - src.cols - ofs.x, wsz.height - src.rows - ofs.y, dx, dy, scale, delta, borderType&~BORDER_ISOLATED);
509

510
    CV_IPP_RUN_FAST(ipp_Deriv(src, dst, dx, dy, 0, scale, delta, borderType));
511

512
    sepFilter2D( src, dst, ddepth, kx, ky, Point(-1, -1), delta, borderType );
513
}
514

515
#ifdef HAVE_OPENCL
516

517
namespace cv {
518

519
#define LAPLACIAN_LOCAL_MEM(tileX, tileY, ksize, elsize) (((tileX) + 2 * (int)((ksize) / 2)) * (3 * (tileY) + 2 * (int)((ksize) / 2)) * elsize)
520

521
static bool ocl_Laplacian5(InputArray _src, OutputArray _dst,
522
                           const Mat & kd, const Mat & ks, double scale, double delta,
523
                           int borderType, int depth, int ddepth)
524
{
525
    const size_t tileSizeX = 16;
526
    const size_t tileSizeYmin = 8;
527

528
    const ocl::Device dev = ocl::Device::getDefault();
529

530
    int stype = _src.type();
531
    int sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype), esz = CV_ELEM_SIZE(stype);
532

533
    bool doubleSupport = dev.doubleFPConfig() > 0;
534
    if (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F))
535
        return false;
536

537
    Mat kernelX = kd.reshape(1, 1);
538
    if (kernelX.cols % 2 != 1)
539
        return false;
540
    Mat kernelY = ks.reshape(1, 1);
541
    if (kernelY.cols % 2 != 1)
542
        return false;
543
    CV_Assert(kernelX.cols == kernelY.cols);
544

545
    size_t wgs = dev.maxWorkGroupSize();
546
    size_t lmsz = dev.localMemSize();
547
    size_t src_step = _src.step(), src_offset = _src.offset();
548
    const size_t tileSizeYmax = wgs / tileSizeX;
549
    CV_Assert(src_step != 0 && esz != 0);
550

551
    // workaround for NVIDIA: 3 channel vector type takes 4*elem_size in local memory
552
    int loc_mem_cn = dev.vendorID() == ocl::Device::VENDOR_NVIDIA && cn == 3 ? 4 : cn;
553
    if (((src_offset % src_step) % esz == 0) &&
554
        (
555
         (borderType == BORDER_CONSTANT || borderType == BORDER_REPLICATE) ||
556
         ((borderType == BORDER_REFLECT || borderType == BORDER_WRAP || borderType == BORDER_REFLECT_101) &&
557
          (_src.cols() >= (int) (kernelX.cols + tileSizeX) && _src.rows() >= (int) (kernelY.cols + tileSizeYmax)))
558
        ) &&
559
        (tileSizeX * tileSizeYmin <= wgs) &&
560
        (LAPLACIAN_LOCAL_MEM(tileSizeX, tileSizeYmin, kernelX.cols, loc_mem_cn * 4) <= lmsz)
561
        && OCL_PERFORMANCE_CHECK(!dev.isAMD())  // TODO FIXIT 2018: Problem with AMDGPU on Linux (2482.3)
562
       )
563
    {
564
        Size size = _src.size(), wholeSize;
565
        Point origin;
566
        int dtype = CV_MAKE_TYPE(ddepth, cn);
567
        int wdepth = CV_32F;
568

569
        size_t tileSizeY = tileSizeYmax;
570
        while ((tileSizeX * tileSizeY > wgs) || (LAPLACIAN_LOCAL_MEM(tileSizeX, tileSizeY, kernelX.cols, loc_mem_cn * 4) > lmsz))
571
        {
572
            tileSizeY /= 2;
573
        }
574
        size_t lt2[2] = { tileSizeX, tileSizeY};
575
        size_t gt2[2] = { lt2[0] * (1 + (size.width - 1) / lt2[0]), lt2[1] };
576

577
        char cvt[2][40];
578
        const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP",
579
                                           "BORDER_REFLECT_101" };
580

581
        String opts = cv::format("-D BLK_X=%d -D BLK_Y=%d -D RADIUS=%d%s%s"
582
                                 " -D convertToWT=%s -D convertToDT=%s"
583
                                 " -D %s -D srcT1=%s -D dstT1=%s -D WT1=%s"
584
                                 " -D srcT=%s -D dstT=%s -D WT=%s"
585
                                 " -D CN=%d ",
586
                                 (int)lt2[0], (int)lt2[1], kernelX.cols / 2,
587
                                 ocl::kernelToStr(kernelX, wdepth, "KERNEL_MATRIX_X").c_str(),
588
                                 ocl::kernelToStr(kernelY, wdepth, "KERNEL_MATRIX_Y").c_str(),
589
                                 ocl::convertTypeStr(sdepth, wdepth, cn, cvt[0]),
590
                                 ocl::convertTypeStr(wdepth, ddepth, cn, cvt[1]),
591
                                 borderMap[borderType],
592
                                 ocl::typeToStr(sdepth), ocl::typeToStr(ddepth), ocl::typeToStr(wdepth),
593
                                 ocl::typeToStr(CV_MAKETYPE(sdepth, cn)),
594
                                 ocl::typeToStr(CV_MAKETYPE(ddepth, cn)),
595
                                 ocl::typeToStr(CV_MAKETYPE(wdepth, cn)),
596
                                 cn);
597

598
        ocl::Kernel k("laplacian", ocl::imgproc::laplacian5_oclsrc, opts);
599
        if (k.empty())
600
            return false;
601
        UMat src = _src.getUMat();
602
        _dst.create(size, dtype);
603
        UMat dst = _dst.getUMat();
604

605
        int src_offset_x = static_cast<int>((src_offset % src_step) / esz);
606
        int src_offset_y = static_cast<int>(src_offset / src_step);
607

608
        src.locateROI(wholeSize, origin);
609

610
        k.args(ocl::KernelArg::PtrReadOnly(src), (int)src_step, src_offset_x, src_offset_y,
611
               wholeSize.height, wholeSize.width, ocl::KernelArg::WriteOnly(dst),
612
               static_cast<float>(scale), static_cast<float>(delta));
613

614
        return k.run(2, gt2, lt2, false);
615
    }
616
    int iscale = cvRound(scale), idelta = cvRound(delta);
617
    bool floatCoeff = std::fabs(delta - idelta) > DBL_EPSILON || std::fabs(scale - iscale) > DBL_EPSILON;
618
    int wdepth = std::max(depth, floatCoeff ? CV_32F : CV_32S), kercn = 1;
619

620
    if (!doubleSupport && wdepth == CV_64F)
621
        return false;
622

623
    char cvt[2][40];
624
    ocl::Kernel k("sumConvert", ocl::imgproc::laplacian5_oclsrc,
625
                  format("-D ONLY_SUM_CONVERT "
626
                         "-D srcT=%s -D WT=%s -D dstT=%s -D coeffT=%s -D wdepth=%d "
627
                         "-D convertToWT=%s -D convertToDT=%s%s",
628
                         ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)),
629
                         ocl::typeToStr(CV_MAKE_TYPE(wdepth, kercn)),
630
                         ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)),
631
                         ocl::typeToStr(wdepth), wdepth,
632
                         ocl::convertTypeStr(depth, wdepth, kercn, cvt[0]),
633
                         ocl::convertTypeStr(wdepth, ddepth, kercn, cvt[1]),
634
                         doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
635
    if (k.empty())
636
        return false;
637

638
    UMat d2x, d2y;
639
    sepFilter2D(_src, d2x, depth, kd, ks, Point(-1, -1), 0, borderType);
640
    sepFilter2D(_src, d2y, depth, ks, kd, Point(-1, -1), 0, borderType);
641

642
    UMat dst = _dst.getUMat();
643

644
    ocl::KernelArg d2xarg = ocl::KernelArg::ReadOnlyNoSize(d2x),
645
            d2yarg = ocl::KernelArg::ReadOnlyNoSize(d2y),
646
            dstarg = ocl::KernelArg::WriteOnly(dst, cn, kercn);
647

648
    if (wdepth >= CV_32F)
649
        k.args(d2xarg, d2yarg, dstarg, (float)scale, (float)delta);
650
    else
651
        k.args(d2xarg, d2yarg, dstarg, iscale, idelta);
652

653
    size_t globalsize[] = { (size_t)dst.cols * cn / kercn, (size_t)dst.rows };
654
    return k.run(2, globalsize, NULL, false);
655
}
656

657
static bool ocl_Laplacian3_8UC1(InputArray _src, OutputArray _dst, int ddepth,
658
                                InputArray _kernel, double delta, int borderType)
659
{
660
    const ocl::Device & dev = ocl::Device::getDefault();
661
    int type = _src.type(), sdepth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
662

663
    if ( !(dev.isIntel() && (type == CV_8UC1) && (ddepth == CV_8U) &&
664
         (borderType != BORDER_WRAP) &&
665
         (_src.offset() == 0) && (_src.step() % 4 == 0) &&
666
         (_src.cols() % 16 == 0) && (_src.rows() % 2 == 0)) )
667
        return false;
668

669
    Mat kernel = _kernel.getMat().reshape(1, 1);
670

671
    if (ddepth < 0)
672
        ddepth = sdepth;
673

674
    Size size = _src.size();
675
    size_t globalsize[2] = { 0, 0 };
676
    size_t localsize[2] = { 0, 0 };
677

678
    globalsize[0] = size.width / 16;
679
    globalsize[1] = size.height / 2;
680

681
    const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", 0, "BORDER_REFLECT_101" };
682
    char build_opts[1024];
683
    sprintf(build_opts, "-D %s %s", borderMap[borderType],
684
            ocl::kernelToStr(kernel, CV_32F, "KERNEL_MATRIX").c_str());
685

686
    ocl::Kernel k("laplacian3_8UC1_cols16_rows2", cv::ocl::imgproc::laplacian3_oclsrc, build_opts);
687
    if (k.empty())
688
        return false;
689

690
    UMat src = _src.getUMat();
691
    _dst.create(size, CV_MAKETYPE(ddepth, cn));
692
    if (!(_dst.offset() == 0 && _dst.step() % 4 == 0))
693
        return false;
694
    UMat dst = _dst.getUMat();
695

696
    int idxArg = k.set(0, ocl::KernelArg::PtrReadOnly(src));
697
    idxArg = k.set(idxArg, (int)src.step);
698
    idxArg = k.set(idxArg, ocl::KernelArg::PtrWriteOnly(dst));
699
    idxArg = k.set(idxArg, (int)dst.step);
700
    idxArg = k.set(idxArg, (int)dst.rows);
701
    idxArg = k.set(idxArg, (int)dst.cols);
702
    idxArg = k.set(idxArg, static_cast<float>(delta));
703

704
    return k.run(2, globalsize, (localsize[0] == 0) ? NULL : localsize, false);
705
}
706

707
}
708
#endif
709

710
#if defined(HAVE_IPP)
711
namespace cv
712
{
713

714
static bool ipp_Laplacian(InputArray _src, OutputArray _dst, int ksize, double scale, double delta, int borderType)
715
{
716
#ifdef HAVE_IPP_IW
717
    CV_INSTRUMENT_REGION_IPP();
718

719
    ::ipp::IwiSize size(_src.size().width, _src.size().height);
720
    IppDataType   srcType   = ippiGetDataType(_src.depth());
721
    IppDataType   dstType   = ippiGetDataType(_dst.depth());
722
    int           channels  = _src.channels();
723
    bool          useScale  = false;
724

725
    if(channels != _dst.channels() || channels > 1)
726
        return false;
727

728
    if(fabs(delta) > FLT_EPSILON || fabs(scale-1) > FLT_EPSILON)
729
        useScale = true;
730

731
    IppiMaskSize maskSize = ippiGetMaskSize(ksize, ksize);
732
    if((int)maskSize < 0)
733
        return false;
734

735
    // Acquire data and begin processing
736
    try
737
    {
738
        Mat src = _src.getMat();
739
        Mat dst = _dst.getMat();
740
        ::ipp::IwiImage iwSrc      = ippiGetImage(src);
741
        ::ipp::IwiImage iwDst      = ippiGetImage(dst);
742
        ::ipp::IwiImage iwSrcProc  = iwSrc;
743
        ::ipp::IwiImage iwDstProc  = iwDst;
744
        ::ipp::IwiBorderSize  borderSize(maskSize);
745
        ::ipp::IwiBorderType  ippBorder(ippiGetBorder(iwSrc, borderType, borderSize));
746
        if(!ippBorder)
747
            return false;
748

749
        if(srcType == ipp8u && dstType == ipp8u)
750
        {
751
            iwDstProc.Alloc(iwDst.m_size, ipp16s, channels);
752
            useScale = true;
753
        }
754
        else if(srcType == ipp8u && dstType == ipp32f)
755
        {
756
            iwSrc -= borderSize;
757
            iwSrcProc.Alloc(iwSrc.m_size, ipp32f, channels);
758
            CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, iwSrc, iwSrcProc, 1, 0);
759
            iwSrcProc += borderSize;
760
        }
761

762
        CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterLaplacian, iwSrcProc, iwDstProc, maskSize, ::ipp::IwDefault(), ippBorder);
763

764
        if(useScale)
765
            CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, iwDstProc, iwDst, scale, delta);
766

767
    }
768
    catch (const ::ipp::IwException &)
769
    {
770
        return false;
771
    }
772

773
    return true;
774
#else
775
    CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(ksize); CV_UNUSED(scale); CV_UNUSED(delta); CV_UNUSED(borderType);
776
    return false;
777
#endif
778
}
779
}
780
#endif
781

782

783
void cv::Laplacian( InputArray _src, OutputArray _dst, int ddepth, int ksize,
784
                    double scale, double delta, int borderType )
785
{
786
    CV_INSTRUMENT_REGION();
787

788
    int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype);
789
    if (ddepth < 0)
790
        ddepth = sdepth;
791
    _dst.create( _src.size(), CV_MAKETYPE(ddepth, cn) );
792

793
    if( ksize == 1 || ksize == 3 )
794
    {
795
        float K[2][9] =
796
        {
797
            { 0, 1, 0, 1, -4, 1, 0, 1, 0 },
798
            { 2, 0, 2, 0, -8, 0, 2, 0, 2 }
799
        };
800

801
        Mat kernel(3, 3, CV_32F, K[ksize == 3]);
802
        if( scale != 1 )
803
            kernel *= scale;
804

805
        CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2,
806
                   ocl_Laplacian3_8UC1(_src, _dst, ddepth, kernel, delta, borderType));
807
    }
808

809
    CV_IPP_RUN(!(cv::ocl::isOpenCLActivated() && _dst.isUMat()), ipp_Laplacian(_src, _dst, ksize, scale, delta, borderType));
810

811
    if( ksize == 1 || ksize == 3 )
812
    {
813
        float K[2][9] =
814
        {
815
            { 0, 1, 0, 1, -4, 1, 0, 1, 0 },
816
            { 2, 0, 2, 0, -8, 0, 2, 0, 2 }
817
        };
818
        Mat kernel(3, 3, CV_32F, K[ksize == 3]);
819
        if( scale != 1 )
820
            kernel *= scale;
821

822
        filter2D( _src, _dst, ddepth, kernel, Point(-1, -1), delta, borderType );
823
    }
824
    else
825
    {
826
        int ktype = std::max(CV_32F, std::max(ddepth, sdepth));
827
        int wdepth = sdepth == CV_8U && ksize <= 5 ? CV_16S : sdepth <= CV_32F ? CV_32F : CV_64F;
828
        int wtype = CV_MAKETYPE(wdepth, cn);
829
        Mat kd, ks;
830
        getSobelKernels( kd, ks, 2, 0, ksize, false, ktype );
831

832
        CV_OCL_RUN(_dst.isUMat(),
833
                   ocl_Laplacian5(_src, _dst, kd, ks, scale,
834
                                  delta, borderType, wdepth, ddepth))
835

836
        Mat src = _src.getMat(), dst = _dst.getMat();
837
        Point ofs;
838
        Size wsz(src.cols, src.rows);
839
        if(!(borderType&BORDER_ISOLATED))
840
            src.locateROI( wsz, ofs );
841
        borderType = (borderType&~BORDER_ISOLATED);
842

843
        const size_t STRIPE_SIZE = 1 << 14;
844
        Ptr<FilterEngine> fx = createSeparableLinearFilter(stype,
845
            wtype, kd, ks, Point(-1,-1), 0, borderType, borderType, Scalar() );
846
        Ptr<FilterEngine> fy = createSeparableLinearFilter(stype,
847
            wtype, ks, kd, Point(-1,-1), 0, borderType, borderType, Scalar() );
848

849
        int y = fx->start(src, wsz, ofs), dsty = 0, dy = 0;
850
        fy->start(src, wsz, ofs);
851
        const uchar* sptr = src.ptr() + src.step[0] * y;
852

853
        int dy0 = std::min(std::max((int)(STRIPE_SIZE/(CV_ELEM_SIZE(stype)*src.cols)), 1), src.rows);
854
        Mat d2x( dy0 + kd.rows - 1, src.cols, wtype );
855
        Mat d2y( dy0 + kd.rows - 1, src.cols, wtype );
856

857
        for( ; dsty < src.rows; sptr += dy0*src.step, dsty += dy )
858
        {
859
            fx->proceed( sptr, (int)src.step, dy0, d2x.ptr(), (int)d2x.step );
860
            dy = fy->proceed( sptr, (int)src.step, dy0, d2y.ptr(), (int)d2y.step );
861
            if( dy > 0 )
862
            {
863
                Mat dstripe = dst.rowRange(dsty, dsty + dy);
864
                d2x.rows = d2y.rows = dy; // modify the headers, which should work
865
                d2x += d2y;
866
                d2x.convertTo( dstripe, ddepth, scale, delta );
867
            }
868
        }
869
    }
870
}
871

872
/////////////////////////////////////////////////////////////////////////////////////////
873

874
CV_IMPL void
875
cvSobel( const void* srcarr, void* dstarr, int dx, int dy, int aperture_size )
876
{
877
    cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
878

879
    CV_Assert( src.size() == dst.size() && src.channels() == dst.channels() );
880

881
    cv::Sobel( src, dst, dst.depth(), dx, dy, aperture_size, 1, 0, cv::BORDER_REPLICATE );
882
    if( CV_IS_IMAGE(srcarr) && ((IplImage*)srcarr)->origin && dy % 2 != 0 )
883
        dst *= -1;
884
}
885

886

887
CV_IMPL void
888
cvLaplace( const void* srcarr, void* dstarr, int aperture_size )
889
{
890
    cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
891

892
    CV_Assert( src.size() == dst.size() && src.channels() == dst.channels() );
893

894
    cv::Laplacian( src, dst, dst.depth(), aperture_size, 1, 0, cv::BORDER_REPLICATE );
895
}
896

897
/* End of file. */
898

899
Product

Resources

Company