CoCalc -- canny.cpp

GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/imgproc/src/canny.cpp
¹⁶³⁵⁴ views
1
/*M///////////////////////////////////////////////////////////////////////////////////////
2
//
3
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4
//
5
//  By downloading, copying, installing or using the software you agree to this license.
6
//  If you do not agree to this license, do not download, install,
7
//  copy or use the software.
8
//
9
//
10
//                        Intel License Agreement
11
//                For Open Source Computer Vision Library
12
//
13
// Copyright (C) 2000, Intel Corporation, all rights reserved.
14
// Copyright (C) 2014, Itseez Inc., all rights reserved.
15
// Third party copyrights are property of their respective owners.
16
//
17
// Redistribution and use in source and binary forms, with or without modification,
18
// are permitted provided that the following conditions are met:
19
//
20
//   * Redistribution's of source code must retain the above copyright notice,
21
//     this list of conditions and the following disclaimer.
22
//
23
//   * Redistribution's in binary form must reproduce the above copyright notice,
24
//     this list of conditions and the following disclaimer in the documentation
25
//     and/or other materials provided with the distribution.
26
//
27
//   * The name of Intel Corporation may not be used to endorse or promote products
28
//     derived from this software without specific prior written permission.
29
//
30
// This software is provided by the copyright holders and contributors "as is" and
31
// any express or implied warranties, including, but not limited to, the implied
32
// warranties of merchantability and fitness for a particular purpose are disclaimed.
33
// In no event shall the Intel Corporation or contributors be liable for any direct,
34
// indirect, incidental, special, exemplary, or consequential damages
35
// (including, but not limited to, procurement of substitute goods or services;
36
// loss of use, data, or profits; or business interruption) however caused
37
// and on any theory of liability, whether in contract, strict liability,
38
// or tort (including negligence or otherwise) arising in any way out of
39
// the use of this software, even if advised of the possibility of such damage.
40
//
41
//M*/
42

43
#include "precomp.hpp"
44
#include "opencl_kernels_imgproc.hpp"
45
#include "opencv2/core/hal/intrin.hpp"
46
#include <deque>
47

48
#include "opencv2/core/openvx/ovx_defs.hpp"
49

50
#if CV_SIMD128
51
#define CV_MALLOC_SIMD128 16
52
#endif
53

54
namespace cv
55
{
56

57
#ifdef HAVE_IPP
58
static bool ipp_Canny(const Mat& src , const Mat& dx_, const Mat& dy_, Mat& dst, float low,  float high, bool L2gradient, int aperture_size)
59
{
60
#ifdef HAVE_IPP_IW
61
    CV_INSTRUMENT_REGION_IPP();
62

63
#if IPP_DISABLE_PERF_CANNY_MT
64
    if(cv::getNumThreads()>1)
65
        return false;
66
#endif
67

68
    ::ipp::IwiSize size(dst.cols, dst.rows);
69
    IppDataType    type     = ippiGetDataType(dst.depth());
70
    int            channels = dst.channels();
71
    IppNormType    norm     = (L2gradient)?ippNormL2:ippNormL1;
72

73
    if(size.width <= 3 || size.height <= 3)
74
        return false;
75

76
    if(channels != 1)
77
        return false;
78

79
    if(type != ipp8u)
80
        return false;
81

82
    if(src.empty())
83
    {
84
        try
85
        {
86
            ::ipp::IwiImage iwSrcDx;
87
            ::ipp::IwiImage iwSrcDy;
88
            ::ipp::IwiImage iwDst;
89

90
            ippiGetImage(dx_, iwSrcDx);
91
            ippiGetImage(dy_, iwSrcDy);
92
            ippiGetImage(dst, iwDst);
93

94
            CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterCannyDeriv, iwSrcDx, iwSrcDy, iwDst, low, high, ::ipp::IwiFilterCannyDerivParams(norm));
95
        }
96
        catch (const ::ipp::IwException &)
97
        {
98
            return false;
99
        }
100
    }
101
    else
102
    {
103
        IppiMaskSize kernel;
104

105
        if(aperture_size == 3)
106
            kernel = ippMskSize3x3;
107
        else if(aperture_size == 5)
108
            kernel = ippMskSize5x5;
109
        else
110
            return false;
111

112
        try
113
        {
114
            ::ipp::IwiImage iwSrc;
115
            ::ipp::IwiImage iwDst;
116

117
            ippiGetImage(src, iwSrc);
118
            ippiGetImage(dst, iwDst);
119

120
            CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterCanny, iwSrc, iwDst, low, high, ::ipp::IwiFilterCannyParams(ippFilterSobel, kernel, norm), ippBorderRepl);
121
        }
122
        catch (const ::ipp::IwException &)
123
        {
124
            return false;
125
        }
126
    }
127

128
    return true;
129
#else
130
    CV_UNUSED(src); CV_UNUSED(dx_); CV_UNUSED(dy_); CV_UNUSED(dst); CV_UNUSED(low); CV_UNUSED(high); CV_UNUSED(L2gradient); CV_UNUSED(aperture_size);
131
    return false;
132
#endif
133
}
134
#endif
135

136
#ifdef HAVE_OPENCL
137

138
template <bool useCustomDeriv>
139
static bool ocl_Canny(InputArray _src, const UMat& dx_, const UMat& dy_, OutputArray _dst, float low_thresh, float high_thresh,
140
                      int aperture_size, bool L2gradient, int cn, const Size & size)
141
{
142
    CV_INSTRUMENT_REGION_OPENCL();
143

144
    UMat map;
145

146
    const ocl::Device &dev = ocl::Device::getDefault();
147
    int max_wg_size = (int)dev.maxWorkGroupSize();
148

149
    int lSizeX = 32;
150
    int lSizeY = max_wg_size / 32;
151

152
    if (lSizeY == 0)
153
    {
154
        lSizeX = 16;
155
        lSizeY = max_wg_size / 16;
156
    }
157
    if (lSizeY == 0)
158
    {
159
        lSizeY = 1;
160
    }
161

162
    if (aperture_size == 7)
163
    {
164
        low_thresh = low_thresh / 16.0f;
165
        high_thresh = high_thresh / 16.0f;
166
    }
167

168
    if (L2gradient)
169
    {
170
        low_thresh = std::min(32767.0f, low_thresh);
171
        high_thresh = std::min(32767.0f, high_thresh);
172

173
        if (low_thresh > 0)
174
            low_thresh *= low_thresh;
175
        if (high_thresh > 0)
176
            high_thresh *= high_thresh;
177
    }
178
    int low = cvFloor(low_thresh), high = cvFloor(high_thresh);
179

180
    if (!useCustomDeriv &&
181
        aperture_size == 3 && !_src.isSubmatrix())
182
    {
183
        /*
184
            stage1_with_sobel:
185
                Sobel operator
186
                Calc magnitudes
187
                Non maxima suppression
188
                Double thresholding
189
        */
190
        char cvt[40];
191
        ocl::Kernel with_sobel("stage1_with_sobel", ocl::imgproc::canny_oclsrc,
192
                               format("-D WITH_SOBEL -D cn=%d -D TYPE=%s -D convert_floatN=%s -D floatN=%s -D GRP_SIZEX=%d -D GRP_SIZEY=%d%s",
193
                                      cn, ocl::memopTypeToStr(_src.depth()),
194
                                      ocl::convertTypeStr(_src.depth(), CV_32F, cn, cvt),
195
                                      ocl::typeToStr(CV_MAKE_TYPE(CV_32F, cn)),
196
                                      lSizeX, lSizeY,
197
                                      L2gradient ? " -D L2GRAD" : ""));
198
        if (with_sobel.empty())
199
            return false;
200

201
        UMat src = _src.getUMat();
202
        map.create(size, CV_32S);
203
        with_sobel.args(ocl::KernelArg::ReadOnly(src),
204
                        ocl::KernelArg::WriteOnlyNoSize(map),
205
                        (float) low, (float) high);
206

207
        size_t globalsize[2] = { (size_t)size.width, (size_t)size.height },
208
                localsize[2] = { (size_t)lSizeX, (size_t)lSizeY };
209

210
        if (!with_sobel.run(2, globalsize, localsize, false))
211
            return false;
212
    }
213
    else
214
    {
215
        /*
216
            stage1_without_sobel:
217
                Calc magnitudes
218
                Non maxima suppression
219
                Double thresholding
220
        */
221
        double scale = 1.0;
222
        if (aperture_size == 7)
223
        {
224
            scale = 1 / 16.0;
225
        }
226

227
        UMat dx, dy;
228
        if (!useCustomDeriv)
229
        {
230
            Sobel(_src, dx, CV_16S, 1, 0, aperture_size, scale, 0, BORDER_REPLICATE);
231
            Sobel(_src, dy, CV_16S, 0, 1, aperture_size, scale, 0, BORDER_REPLICATE);
232
        }
233
        else
234
        {
235
            dx = dx_;
236
            dy = dy_;
237
        }
238

239
        ocl::Kernel without_sobel("stage1_without_sobel", ocl::imgproc::canny_oclsrc,
240
                                    format("-D WITHOUT_SOBEL -D cn=%d -D GRP_SIZEX=%d -D GRP_SIZEY=%d%s",
241
                                           cn, lSizeX, lSizeY, L2gradient ? " -D L2GRAD" : ""));
242
        if (without_sobel.empty())
243
            return false;
244

245
        map.create(size, CV_32S);
246
        without_sobel.args(ocl::KernelArg::ReadOnlyNoSize(dx), ocl::KernelArg::ReadOnlyNoSize(dy),
247
                           ocl::KernelArg::WriteOnly(map),
248
                           low, high);
249

250
        size_t globalsize[2] = { (size_t)size.width, (size_t)size.height },
251
                localsize[2] = { (size_t)lSizeX, (size_t)lSizeY };
252

253
        if (!without_sobel.run(2, globalsize, localsize, false))
254
            return false;
255
    }
256

257
    int PIX_PER_WI = 8;
258
    /*
259
        stage2:
260
            hysteresis (add weak edges if they are connected with strong edges)
261
    */
262

263
    int sizey = lSizeY / PIX_PER_WI;
264
    if (sizey == 0)
265
        sizey = 1;
266

267
    size_t globalsize[2] = { (size_t)size.width, ((size_t)size.height + PIX_PER_WI - 1) / PIX_PER_WI }, localsize[2] = { (size_t)lSizeX, (size_t)sizey };
268

269
    ocl::Kernel edgesHysteresis("stage2_hysteresis", ocl::imgproc::canny_oclsrc,
270
                                format("-D STAGE2 -D PIX_PER_WI=%d -D LOCAL_X=%d -D LOCAL_Y=%d",
271
                                PIX_PER_WI, lSizeX, sizey));
272

273
    if (edgesHysteresis.empty())
274
        return false;
275

276
    edgesHysteresis.args(ocl::KernelArg::ReadWrite(map));
277
    if (!edgesHysteresis.run(2, globalsize, localsize, false))
278
        return false;
279

280
    // get edges
281

282
    ocl::Kernel getEdgesKernel("getEdges", ocl::imgproc::canny_oclsrc,
283
                                format("-D GET_EDGES -D PIX_PER_WI=%d", PIX_PER_WI));
284
    if (getEdgesKernel.empty())
285
        return false;
286

287
    _dst.create(size, CV_8UC1);
288
    UMat dst = _dst.getUMat();
289

290
    getEdgesKernel.args(ocl::KernelArg::ReadOnly(map), ocl::KernelArg::WriteOnlyNoSize(dst));
291

292
    return getEdgesKernel.run(2, globalsize, NULL, false);
293
}
294

295
#endif
296

297
#define CANNY_PUSH(map, stack) *map = 2, stack.push_back(map)
298

299
#define CANNY_CHECK_SIMD(m, high, map, stack) \
300
    if (m > high) \
301
        CANNY_PUSH(map, stack); \
302
    else \
303
        *map = 0
304

305
#define CANNY_CHECK(m, high, map, stack) \
306
    if (m > high) \
307
        CANNY_PUSH(map, stack); \
308
    else \
309
        *map = 0; \
310
    continue
311

312
class parallelCanny : public ParallelLoopBody
313
{
314
public:
315
    parallelCanny(const Mat &_src, Mat &_map, std::deque<uchar*> &borderPeaksParallel,
316
                  int _low, int _high, int _aperture_size, bool _L2gradient) :
317
        src(_src), src2(_src), map(_map), _borderPeaksParallel(borderPeaksParallel),
318
        low(_low), high(_high), aperture_size(_aperture_size), L2gradient(_L2gradient)
319
    {
320
#if CV_SIMD128
321
        haveSIMD = hasSIMD128();
322
        if(haveSIMD)
323
            _map.create(src.rows + 2, (int)alignSize((size_t)(src.cols + CV_MALLOC_SIMD128 + 1), CV_MALLOC_SIMD128), CV_8UC1);
324
        else
325
#endif
326
            _map.create(src.rows + 2, src.cols + 2,  CV_8UC1);
327
        map = _map;
328
        map.row(0).setTo(1);
329
        map.row(src.rows + 1).setTo(1);
330
        mapstep = map.cols;
331
        needGradient = true;
332
        cn = src.channels();
333
    }
334

335
    parallelCanny(const Mat &_dx, const Mat &_dy, Mat &_map, std::deque<uchar*> &borderPeaksParallel,
336
                  int _low, int _high, bool _L2gradient) :
337
        src(_dx), src2(_dy), map(_map), _borderPeaksParallel(borderPeaksParallel),
338
        low(_low), high(_high), aperture_size(0), L2gradient(_L2gradient)
339
    {
340
#if CV_SIMD128
341
        haveSIMD = hasSIMD128();
342
        if(haveSIMD)
343
            _map.create(src.rows + 2, (int)alignSize((size_t)(src.cols + CV_MALLOC_SIMD128 + 1), CV_MALLOC_SIMD128), CV_8UC1);
344
        else
345
#endif
346
            _map.create(src.rows + 2, src.cols + 2,  CV_8UC1);
347
        map = _map;
348
        map.row(0).setTo(1);
349
        map.row(src.rows + 1).setTo(1);
350
        mapstep = map.cols;
351
        needGradient = false;
352
        cn = src.channels();
353
    }
354

355
    ~parallelCanny() {}
356

357
    parallelCanny& operator=(const parallelCanny&) { return *this; }
358

359
    void operator()(const Range &boundaries) const CV_OVERRIDE
360
    {
361
        CV_TRACE_FUNCTION();
362

363
        Mat dx, dy;
364
        AutoBuffer<short> dxMax(0), dyMax(0);
365
        std::deque<uchar*> stack, borderPeaksLocal;
366
        const int rowStart = max(0, boundaries.start - 1), rowEnd = min(src.rows, boundaries.end + 1);
367
        int *_mag_p, *_mag_a, *_mag_n;
368
        short *_dx, *_dy, *_dx_a = NULL, *_dy_a = NULL, *_dx_n = NULL, *_dy_n = NULL;
369
        uchar *_pmap;
370
        double scale = 1.0;
371

372
        CV_TRACE_REGION("gradient")
373
        if(needGradient)
374
        {
375
            if (aperture_size == 7)
376
            {
377
                scale = 1 / 16.0;
378
            }
379
            Sobel(src.rowRange(rowStart, rowEnd), dx, CV_16S, 1, 0, aperture_size, scale, 0, BORDER_REPLICATE);
380
            Sobel(src.rowRange(rowStart, rowEnd), dy, CV_16S, 0, 1, aperture_size, scale, 0, BORDER_REPLICATE);
381
        }
382
        else
383
        {
384
            dx = src.rowRange(rowStart, rowEnd);
385
            dy = src2.rowRange(rowStart, rowEnd);
386
        }
387

388
        CV_TRACE_REGION_NEXT("magnitude");
389
        if(cn > 1)
390
        {
391
            dxMax.allocate(2 * dx.cols);
392
            dyMax.allocate(2 * dy.cols);
393
            _dx_a = dxMax.data();
394
            _dx_n = _dx_a + dx.cols;
395
            _dy_a = dyMax.data();
396
            _dy_n = _dy_a + dy.cols;
397
        }
398

399
        // _mag_p: previous row, _mag_a: actual row, _mag_n: next row
400
#if CV_SIMD128
401
        AutoBuffer<int> buffer(3 * (mapstep * cn + CV_MALLOC_SIMD128));
402
        _mag_p = alignPtr(buffer.data() + 1, CV_MALLOC_SIMD128);
403
        _mag_a = alignPtr(_mag_p + mapstep * cn, CV_MALLOC_SIMD128);
404
        _mag_n = alignPtr(_mag_a + mapstep * cn, CV_MALLOC_SIMD128);
405
#else
406
        AutoBuffer<int> buffer(3 * (mapstep * cn));
407
        _mag_p = buffer.data() + 1;
408
        _mag_a = _mag_p + mapstep * cn;
409
        _mag_n = _mag_a + mapstep * cn;
410
#endif
411

412
        // For the first time when just 2 rows are filled and for left and right borders
413
        if(rowStart == boundaries.start)
414
            memset(_mag_n - 1, 0, mapstep * sizeof(int));
415
        else
416
            _mag_n[src.cols] = _mag_n[-1] = 0;
417

418
        _mag_a[src.cols] = _mag_a[-1] = _mag_p[src.cols] = _mag_p[-1] = 0;
419

420
        // calculate magnitude and angle of gradient, perform non-maxima suppression.
421
        // fill the map with one of the following values:
422
        //   0 - the pixel might belong to an edge
423
        //   1 - the pixel can not belong to an edge
424
        //   2 - the pixel does belong to an edge
425
        for (int i = rowStart; i <= boundaries.end; ++i)
426
        {
427
            // Scroll the ring buffer
428
            std::swap(_mag_n, _mag_a);
429
            std::swap(_mag_n, _mag_p);
430

431
            if(i < rowEnd)
432
            {
433
                // Next row calculation
434
                _dx = dx.ptr<short>(i - rowStart);
435
                _dy = dy.ptr<short>(i - rowStart);
436

437
                if (L2gradient)
438
                {
439
                    int j = 0, width = src.cols * cn;
440
#if CV_SIMD128
441
                    if (haveSIMD)
442
                    {
443
                       for ( ; j <= width - 8; j += 8)
444
                        {
445
                            v_int16x8 v_dx = v_load((const short*)(_dx + j));
446
                            v_int16x8 v_dy = v_load((const short*)(_dy + j));
447

448
                            v_int32x4 v_dxp_low, v_dxp_high;
449
                            v_int32x4 v_dyp_low, v_dyp_high;
450
                            v_expand(v_dx, v_dxp_low, v_dxp_high);
451
                            v_expand(v_dy, v_dyp_low, v_dyp_high);
452

453
                            v_store_aligned((int *)(_mag_n + j), v_dxp_low*v_dxp_low+v_dyp_low*v_dyp_low);
454
                            v_store_aligned((int *)(_mag_n + j + 4), v_dxp_high*v_dxp_high+v_dyp_high*v_dyp_high);
455
                        }
456
                    }
457
#endif
458
                    for ( ; j < width; ++j)
459
                        _mag_n[j] = int(_dx[j])*_dx[j] + int(_dy[j])*_dy[j];
460
                }
461
                else
462
                {
463
                    int j = 0, width = src.cols * cn;
464
#if CV_SIMD128
465
                    if (haveSIMD)
466
                    {
467
                        for(; j <= width - 8; j += 8)
468
                        {
469
                            v_int16x8 v_dx = v_load((const short *)(_dx + j));
470
                            v_int16x8 v_dy = v_load((const short *)(_dy + j));
471

472
                            v_dx = v_reinterpret_as_s16(v_abs(v_dx));
473
                            v_dy = v_reinterpret_as_s16(v_abs(v_dy));
474

475
                            v_int32x4 v_dx_ml, v_dy_ml, v_dx_mh, v_dy_mh;
476
                            v_expand(v_dx, v_dx_ml, v_dx_mh);
477
                            v_expand(v_dy, v_dy_ml, v_dy_mh);
478

479
                            v_store_aligned((int *)(_mag_n + j), v_dx_ml + v_dy_ml);
480
                            v_store_aligned((int *)(_mag_n + j + 4), v_dx_mh + v_dy_mh);
481
                        }
482
                    }
483
#endif
484
                    for ( ; j < width; ++j)
485
                        _mag_n[j] = std::abs(int(_dx[j])) + std::abs(int(_dy[j]));
486
                }
487

488
                if(cn > 1)
489
                {
490
                    std::swap(_dx_n, _dx_a);
491
                    std::swap(_dy_n, _dy_a);
492

493
                    for(int j = 0, jn = 0; j < src.cols; ++j, jn += cn)
494
                    {
495
                        int maxIdx = jn;
496
                        for(int k = 1; k < cn; ++k)
497
                            if(_mag_n[jn + k] > _mag_n[maxIdx]) maxIdx = jn + k;
498

499
                        _mag_n[j] = _mag_n[maxIdx];
500
                        _dx_n[j] = _dx[maxIdx];
501
                        _dy_n[j] = _dy[maxIdx];
502
                    }
503

504
                    _mag_n[src.cols] = 0;
505
                }
506

507
                // at the very beginning we do not have a complete ring
508
                // buffer of 3 magnitude rows for non-maxima suppression
509
                if (i <= boundaries.start)
510
                    continue;
511
            }
512
            else
513
            {
514
                memset(_mag_n - 1, 0, mapstep * sizeof(int));
515

516
                if(cn > 1)
517
                {
518
                    std::swap(_dx_n, _dx_a);
519
                    std::swap(_dy_n, _dy_a);
520
                }
521
            }
522

523
            // From here actual src row is (i - 1)
524
            // Set left and right border to 1
525
#if CV_SIMD128
526
            if(haveSIMD)
527
                _pmap = map.ptr<uchar>(i) + CV_MALLOC_SIMD128;
528
            else
529
#endif
530
                _pmap = map.ptr<uchar>(i) + 1;
531

532
            _pmap[src.cols] =_pmap[-1] = 1;
533

534
            if(cn == 1)
535
            {
536
                _dx = dx.ptr<short>(i - rowStart - 1);
537
                _dy = dy.ptr<short>(i - rowStart - 1);
538
            }
539
            else
540
            {
541
                _dx = _dx_a;
542
                _dy = _dy_a;
543
            }
544

545
            const int TG22 = 13573;
546
            int j = 0;
547
#if CV_SIMD128
548
            if (haveSIMD)
549
            {
550
                const v_int32x4 v_low = v_setall_s32(low);
551
                const v_int8x16 v_one = v_setall_s8(1);
552

553
                for (; j <= src.cols - 32; j += 32)
554
                {
555
                    v_int32x4 v_m1 = v_load_aligned((const int*)(_mag_a + j));
556
                    v_int32x4 v_m2 = v_load_aligned((const int*)(_mag_a + j + 4));
557
                    v_int32x4 v_m3 = v_load_aligned((const int*)(_mag_a + j + 8));
558
                    v_int32x4 v_m4 = v_load_aligned((const int*)(_mag_a + j + 12));
559

560
                    v_int32x4 v_cmp1 = v_m1 > v_low;
561
                    v_int32x4 v_cmp2 = v_m2 > v_low;
562
                    v_int32x4 v_cmp3 = v_m3 > v_low;
563
                    v_int32x4 v_cmp4 = v_m4 > v_low;
564

565
                    v_m1 = v_load_aligned((const int*)(_mag_a + j + 16));
566
                    v_m2 = v_load_aligned((const int*)(_mag_a + j + 20));
567
                    v_m3 = v_load_aligned((const int*)(_mag_a + j + 24));
568
                    v_m4 = v_load_aligned((const int*)(_mag_a + j + 28));
569

570
                    v_store_aligned((signed char*)(_pmap + j), v_one);
571
                    v_store_aligned((signed char*)(_pmap + j + 16), v_one);
572

573
                    v_int16x8 v_cmp80 = v_pack(v_cmp1, v_cmp2);
574
                    v_int16x8 v_cmp81 = v_pack(v_cmp3, v_cmp4);
575

576
                    v_cmp1 = v_m1 > v_low;
577
                    v_cmp2 = v_m2 > v_low;
578
                    v_cmp3 = v_m3 > v_low;
579
                    v_cmp4 = v_m4 > v_low;
580

581
                    v_int8x16 v_cmp = v_pack(v_cmp80, v_cmp81);
582

583
                    v_cmp80 = v_pack(v_cmp1, v_cmp2);
584
                    v_cmp81 = v_pack(v_cmp3, v_cmp4);
585

586
                    unsigned int mask = v_signmask(v_cmp);
587

588
                    v_cmp = v_pack(v_cmp80, v_cmp81);
589
                    mask |= v_signmask(v_cmp) << 16;
590

591
                    if (mask)
592
                    {
593
                        int k = j;
594

595
                        do
596
                        {
597
                            int l = trailingZeros32(mask);
598
                            k += l;
599
                            mask >>= l;
600

601
                            int m = _mag_a[k];
602
                            short xs = _dx[k];
603
                            short ys = _dy[k];
604
                            int x = (int)std::abs(xs);
605
                            int y = (int)std::abs(ys) << 15;
606

607
                            int tg22x = x * TG22;
608

609
                            if (y < tg22x)
610
                            {
611
                                if (m > _mag_a[k - 1] && m >= _mag_a[k + 1])
612
                                {
613
                                    CANNY_CHECK_SIMD(m, high, (_pmap+k), stack);
614
                                }
615
                            }
616
                            else
617
                            {
618
                                int tg67x = tg22x + (x << 16);
619
                                if (y > tg67x)
620
                                {
621
                                    if (m > _mag_p[k] && m >= _mag_n[k])
622
                                    {
623
                                        CANNY_CHECK_SIMD(m, high, (_pmap+k), stack);
624
                                    }
625
                                }
626
                                else
627
                                {
628
                                    int s = (xs ^ ys) < 0 ? -1 : 1;
629
                                    if(m > _mag_p[k - s] && m > _mag_n[k + s])
630
                                    {
631
                                        CANNY_CHECK_SIMD(m, high, (_pmap+k), stack);
632
                                    }
633
                                }
634
                            }
635
                            ++k;
636
                        } while((mask >>= 1));
637
                    }
638
                }
639

640
                if (j <= src.cols - 16)
641
                {
642
                    v_int32x4 v_m1 = v_load_aligned((const int*)(_mag_a + j));
643
                    v_int32x4 v_m2 = v_load_aligned((const int*)(_mag_a + j + 4));
644
                    v_int32x4 v_m3 = v_load_aligned((const int*)(_mag_a + j + 8));
645
                    v_int32x4 v_m4 = v_load_aligned((const int*)(_mag_a + j + 12));
646

647
                    v_store_aligned((signed char*)(_pmap + j), v_one);
648

649
                    v_int32x4 v_cmp1 = v_m1 > v_low;
650
                    v_int32x4 v_cmp2 = v_m2 > v_low;
651
                    v_int32x4 v_cmp3 = v_m3 > v_low;
652
                    v_int32x4 v_cmp4 = v_m4 > v_low;
653

654
                    v_int16x8 v_cmp80 = v_pack(v_cmp1, v_cmp2);
655
                    v_int16x8 v_cmp81 = v_pack(v_cmp3, v_cmp4);
656

657
                    v_int8x16 v_cmp = v_pack(v_cmp80, v_cmp81);
658
                    unsigned int mask = v_signmask(v_cmp);
659

660
                    if (mask)
661
                    {
662
                        int k = j;
663

664
                        do
665
                        {
666
                            int l = trailingZeros32(mask);
667
                            k += l;
668
                            mask >>= l;
669

670
                            int m = _mag_a[k];
671
                            short xs = _dx[k];
672
                            short ys = _dy[k];
673
                            int x = (int)std::abs(xs);
674
                            int y = (int)std::abs(ys) << 15;
675

676
                            int tg22x = x * TG22;
677

678
                            if (y < tg22x)
679
                            {
680
                                if (m > _mag_a[k - 1] && m >= _mag_a[k + 1])
681
                                {
682
                                    CANNY_CHECK_SIMD(m, high, (_pmap+k), stack);
683
                                }
684
                            }
685
                            else
686
                            {
687
                                int tg67x = tg22x + (x << 16);
688
                                if (y > tg67x)
689
                                {
690
                                    if (m > _mag_p[k] && m >= _mag_n[k])
691
                                    {
692
                                        CANNY_CHECK_SIMD(m, high, (_pmap+k), stack);
693
                                    }
694
                                }
695
                                else
696
                                {
697
                                    int s = (xs ^ ys) < 0 ? -1 : 1;
698
                                    if(m > _mag_p[k - s] && m > _mag_n[k + s])
699
                                    {
700
                                        CANNY_CHECK_SIMD(m, high, (_pmap+k), stack);
701
                                    }
702
                                }
703
                            }
704
                            ++k;
705
                        } while((mask >>= 1));
706
                    }
707
                    j += 16;
708
                }
709
            }
710
#endif
711
            for (; j < src.cols; j++)
712
            {
713
                int m = _mag_a[j];
714

715
                if (m > low)
716
                {
717
                    short xs = _dx[j];
718
                    short ys = _dy[j];
719
                    int x = (int)std::abs(xs);
720
                    int y = (int)std::abs(ys) << 15;
721

722
                    int tg22x = x * TG22;
723

724
                    if (y < tg22x)
725
                    {
726
                        if (m > _mag_a[j - 1] && m >= _mag_a[j + 1])
727
                        {
728
                            CANNY_CHECK(m, high, (_pmap+j), stack);
729
                        }
730
                    }
731
                    else
732
                    {
733
                        int tg67x = tg22x + (x << 16);
734
                        if (y > tg67x)
735
                        {
736
                            if (m > _mag_p[j] && m >= _mag_n[j])
737
                            {
738
                                CANNY_CHECK(m, high, (_pmap+j), stack);
739
                            }
740
                        }
741
                        else
742
                        {
743
                            int s = (xs ^ ys) < 0 ? -1 : 1;
744
                            if(m > _mag_p[j - s] && m > _mag_n[j + s])
745
                            {
746
                                CANNY_CHECK(m, high, (_pmap+j), stack);
747
                            }
748
                        }
749
                    }
750
                }
751
                _pmap[j] = 1;
752
            }
753
        }
754

755
        // Not for first row of first slice or last row of last slice
756
        uchar *pmapLower = (rowStart == 0) ? map.data : (map.data + (boundaries.start + 2) * mapstep);
757
        uint pmapDiff = (uint)(((rowEnd == src.rows) ? map.datalimit : (map.data + boundaries.end * mapstep)) - pmapLower);
758

759
        // now track the edges (hysteresis thresholding)
760
        CV_TRACE_REGION_NEXT("hysteresis");
761
        while (!stack.empty())
762
        {
763
            uchar *m = stack.back();
764
            stack.pop_back();
765

766
            // Stops thresholding from expanding to other slices by sending pixels in the borders of each
767
            // slice in a queue to be serially processed later.
768
            if((unsigned)(m - pmapLower) < pmapDiff)
769
            {
770
                if (!m[-mapstep-1]) CANNY_PUSH((m-mapstep-1), stack);
771
                if (!m[-mapstep])   CANNY_PUSH((m-mapstep), stack);
772
                if (!m[-mapstep+1]) CANNY_PUSH((m-mapstep+1), stack);
773
                if (!m[-1])         CANNY_PUSH((m-1), stack);
774
                if (!m[1])          CANNY_PUSH((m+1), stack);
775
                if (!m[mapstep-1])  CANNY_PUSH((m+mapstep-1), stack);
776
                if (!m[mapstep])    CANNY_PUSH((m+mapstep), stack);
777
                if (!m[mapstep+1])  CANNY_PUSH((m+mapstep+1), stack);
778
            }
779
            else
780
            {
781
                borderPeaksLocal.push_back(m);
782
                ptrdiff_t mapstep2 = m < pmapLower ? mapstep : -mapstep;
783

784
                if (!m[-1])         CANNY_PUSH((m-1), stack);
785
                if (!m[1])          CANNY_PUSH((m+1), stack);
786
                if (!m[mapstep2-1]) CANNY_PUSH((m+mapstep2-1), stack);
787
                if (!m[mapstep2])   CANNY_PUSH((m+mapstep2), stack);
788
                if (!m[mapstep2+1]) CANNY_PUSH((m+mapstep2+1), stack);
789
            }
790
        }
791

792
        if(!borderPeaksLocal.empty())
793
        {
794
            AutoLock lock(mutex);
795
            _borderPeaksParallel.insert(_borderPeaksParallel.end(), borderPeaksLocal.begin(), borderPeaksLocal.end());
796
        }
797
    }
798

799
private:
800
    const Mat &src, &src2;
801
    Mat &map;
802
    std::deque<uchar*> &_borderPeaksParallel;
803
    int low, high, aperture_size;
804
    bool L2gradient, needGradient;
805
    ptrdiff_t mapstep;
806
    int cn;
807
#if CV_SIMD128
808
    bool haveSIMD;
809
#endif
810
    mutable Mutex mutex;
811
};
812

813
class finalPass : public ParallelLoopBody
814
{
815

816
public:
817
    finalPass(const Mat &_map, Mat &_dst) :
818
        map(_map), dst(_dst)
819
    {
820
        dst = _dst;
821
#if CV_SIMD128
822
        haveSIMD = hasSIMD128();
823
#endif
824
    }
825

826
    ~finalPass() {}
827

828
    void operator()(const Range &boundaries) const CV_OVERRIDE
829
    {
830
        // the final pass, form the final image
831
        for (int i = boundaries.start; i < boundaries.end; i++)
832
        {
833
            int j = 0;
834
            uchar *pdst = dst.ptr<uchar>(i);
835
            const uchar *pmap = map.ptr<uchar>(i + 1);
836
#if CV_SIMD128
837
            if(haveSIMD)
838
                pmap += CV_MALLOC_SIMD128;
839
            else
840
#endif
841
                pmap += 1;
842
#if CV_SIMD128
843
            if(haveSIMD) {
844
                const v_uint8x16 v_zero = v_setzero_u8();
845
                const v_uint8x16 v_ff = ~v_zero;
846
                const v_uint8x16 v_two(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
847

848
                for (; j <= dst.cols - 16; j += 16)
849
                {
850
                    v_uint8x16 v_pmap = v_load_aligned((const unsigned char*)(pmap + j));
851
                    v_pmap = v_select(v_pmap == v_two, v_ff, v_zero);
852
                    v_store((pdst + j), v_pmap);
853
                }
854

855
                if (j <= dst.cols - 8)
856
                {
857
                    v_uint8x16 v_pmap = v_load_low((const unsigned char*)(pmap + j));
858
                    v_pmap = v_select(v_pmap == v_two, v_ff, v_zero);
859
                    v_store_low((pdst + j), v_pmap);
860
                    j += 8;
861
                }
862
            }
863
#endif
864
            for (; j < dst.cols; j++)
865
            {
866
                pdst[j] = (uchar)-(pmap[j] >> 1);
867
            }
868
        }
869
    }
870

871
private:
872
    const Mat &map;
873
    Mat &dst;
874
#if CV_SIMD128
875
    bool haveSIMD;
876
#endif
877

878
    finalPass(const finalPass&); // = delete
879
    finalPass& operator=(const finalPass&); // = delete
880
};
881

882
#ifdef HAVE_OPENVX
883
namespace ovx {
884
    template <> inline bool skipSmallImages<VX_KERNEL_CANNY_EDGE_DETECTOR>(int w, int h) { return w*h < 640 * 480; }
885
}
886
static bool openvx_canny(const Mat& src, Mat& dst, int loVal, int hiVal, int kSize, bool useL2)
887
{
888
    using namespace ivx;
889

890
    Context context = ovx::getOpenVXContext();
891
    try
892
    {
893
    Image _src = Image::createFromHandle(
894
                context,
895
                Image::matTypeToFormat(src.type()),
896
                Image::createAddressing(src),
897
                src.data );
898
    Image _dst = Image::createFromHandle(
899
                context,
900
                Image::matTypeToFormat(dst.type()),
901
                Image::createAddressing(dst),
902
                dst.data );
903
    Threshold threshold = Threshold::createRange(context, VX_TYPE_UINT8, saturate_cast<uchar>(loVal), saturate_cast<uchar>(hiVal));
904

905
#if 0
906
    // the code below is disabled because vxuCannyEdgeDetector()
907
    // ignores context attribute VX_CONTEXT_IMMEDIATE_BORDER
908

909
    // FIXME: may fail in multithread case
910
    border_t prevBorder = context.immediateBorder();
911
    context.setImmediateBorder(VX_BORDER_REPLICATE);
912
    IVX_CHECK_STATUS( vxuCannyEdgeDetector(context, _src, threshold, kSize, (useL2 ? VX_NORM_L2 : VX_NORM_L1), _dst) );
913
    context.setImmediateBorder(prevBorder);
914
#else
915
    // alternative code without vxuCannyEdgeDetector()
916
    Graph graph = Graph::create(context);
917
    ivx::Node node = ivx::Node(vxCannyEdgeDetectorNode(graph, _src, threshold, kSize, (useL2 ? VX_NORM_L2 : VX_NORM_L1), _dst) );
918
    node.setBorder(VX_BORDER_REPLICATE);
919
    graph.verify();
920
    graph.process();
921
#endif
922

923
#ifdef VX_VERSION_1_1
924
    _src.swapHandle();
925
    _dst.swapHandle();
926
#endif
927
    }
928
    catch(const WrapperError& e)
929
    {
930
        VX_DbgThrow(e.what());
931
    }
932
    catch(const RuntimeError& e)
933
    {
934
        VX_DbgThrow(e.what());
935
    }
936

937
    return true;
938
}
939
#endif // HAVE_OPENVX
940

941
void Canny( InputArray _src, OutputArray _dst,
942
                double low_thresh, double high_thresh,
943
                int aperture_size, bool L2gradient )
944
{
945
    CV_INSTRUMENT_REGION();
946

947
    CV_Assert( _src.depth() == CV_8U );
948

949
    const Size size = _src.size();
950

951
    // we don't support inplace parameters in case with RGB/BGR src
952
    CV_Assert((_dst.getObj() != _src.getObj() || _src.type() == CV_8UC1) && "Inplace parameters are not supported");
953

954
    _dst.create(size, CV_8U);
955

956
    if (!L2gradient && (aperture_size & CV_CANNY_L2_GRADIENT) == CV_CANNY_L2_GRADIENT)
957
    {
958
        // backward compatibility
959
        aperture_size &= ~CV_CANNY_L2_GRADIENT;
960
        L2gradient = true;
961
    }
962

963
    if ((aperture_size & 1) == 0 || (aperture_size != -1 && (aperture_size < 3 || aperture_size > 7)))
964
        CV_Error(CV_StsBadFlag, "Aperture size should be odd between 3 and 7");
965

966
    if (aperture_size == 7)
967
    {
968
        low_thresh = low_thresh / 16.0;
969
        high_thresh = high_thresh / 16.0;
970
    }
971

972
    if (low_thresh > high_thresh)
973
        std::swap(low_thresh, high_thresh);
974

975
    CV_OCL_RUN(_dst.isUMat() && (_src.channels() == 1 || _src.channels() == 3),
976
               ocl_Canny<false>(_src, UMat(), UMat(), _dst, (float)low_thresh, (float)high_thresh, aperture_size, L2gradient, _src.channels(), size))
977

978
    Mat src0 = _src.getMat(), dst = _dst.getMat();
979
    Mat src(src0.size(), src0.type(), src0.data, src0.step);
980

981
    CALL_HAL(canny, cv_hal_canny, src.data, src.step, dst.data, dst.step, src.cols, src.rows, src.channels(),
982
             low_thresh, high_thresh, aperture_size, L2gradient);
983

984
    CV_OVX_RUN(
985
        false && /* disabling due to accuracy issues */
986
            src.type() == CV_8UC1 &&
987
            !src.isSubmatrix() &&
988
            src.cols >= aperture_size &&
989
            src.rows >= aperture_size &&
990
            !ovx::skipSmallImages<VX_KERNEL_CANNY_EDGE_DETECTOR>(src.cols, src.rows),
991
        openvx_canny(
992
            src,
993
            dst,
994
            cvFloor(low_thresh),
995
            cvFloor(high_thresh),
996
            aperture_size,
997
            L2gradient ) )
998

999
    CV_IPP_RUN_FAST(ipp_Canny(src, Mat(), Mat(), dst, (float)low_thresh, (float)high_thresh, L2gradient, aperture_size))
1000

1001
    if (L2gradient)
1002
    {
1003
        low_thresh = std::min(32767.0, low_thresh);
1004
        high_thresh = std::min(32767.0, high_thresh);
1005

1006
        if (low_thresh > 0) low_thresh *= low_thresh;
1007
        if (high_thresh > 0) high_thresh *= high_thresh;
1008
    }
1009
    int low = cvFloor(low_thresh);
1010
    int high = cvFloor(high_thresh);
1011

1012
    // If Scharr filter: aperture size is 3, ksize2 is 1
1013
    int ksize2 = aperture_size < 0 ? 1 : aperture_size / 2;
1014
    // Minimum number of threads should be 1, maximum should not exceed number of CPU's, because of overhead
1015
    int numOfThreads = std::max(1, std::min(getNumThreads(), getNumberOfCPUs()));
1016
    // Make a fallback for pictures with too few rows.
1017
    int grainSize = src.rows / numOfThreads;
1018
    int minGrainSize = 2 * (ksize2 + 1);
1019
    if (grainSize < minGrainSize)
1020
        numOfThreads = std::max(1, src.rows / minGrainSize);
1021

1022
    Mat map;
1023
    std::deque<uchar*> stack;
1024

1025
    parallel_for_(Range(0, src.rows), parallelCanny(src, map, stack, low, high, aperture_size, L2gradient), numOfThreads);
1026

1027
    CV_TRACE_REGION("global_hysteresis");
1028
    // now track the edges (hysteresis thresholding)
1029
    ptrdiff_t mapstep = map.cols;
1030

1031
    while (!stack.empty())
1032
    {
1033
        uchar* m = stack.back();
1034
        stack.pop_back();
1035

1036
        if (!m[-mapstep-1]) CANNY_PUSH((m-mapstep-1), stack);
1037
        if (!m[-mapstep])   CANNY_PUSH((m-mapstep), stack);
1038
        if (!m[-mapstep+1]) CANNY_PUSH((m-mapstep+1), stack);
1039
        if (!m[-1])         CANNY_PUSH((m-1), stack);
1040
        if (!m[1])          CANNY_PUSH((m+1), stack);
1041
        if (!m[mapstep-1])  CANNY_PUSH((m+mapstep-1), stack);
1042
        if (!m[mapstep])    CANNY_PUSH((m+mapstep), stack);
1043
        if (!m[mapstep+1])  CANNY_PUSH((m+mapstep+1), stack);
1044
    }
1045

1046
    CV_TRACE_REGION_NEXT("finalPass");
1047
    parallel_for_(Range(0, src.rows), finalPass(map, dst), src.total()/(double)(1<<16));
1048
}
1049

1050
void Canny( InputArray _dx, InputArray _dy, OutputArray _dst,
1051
                double low_thresh, double high_thresh,
1052
                bool L2gradient )
1053
{
1054
    CV_INSTRUMENT_REGION();
1055

1056
    CV_Assert(_dx.dims() == 2);
1057
    CV_Assert(_dx.type() == CV_16SC1 || _dx.type() == CV_16SC3);
1058
    CV_Assert(_dy.type() == _dx.type());
1059
    CV_Assert(_dx.sameSize(_dy));
1060

1061
    if (low_thresh > high_thresh)
1062
        std::swap(low_thresh, high_thresh);
1063

1064
    const Size size = _dx.size();
1065

1066
    CV_OCL_RUN(_dst.isUMat(),
1067
               ocl_Canny<true>(UMat(), _dx.getUMat(), _dy.getUMat(), _dst, (float)low_thresh, (float)high_thresh, 0, L2gradient, _dx.channels(), size))
1068

1069
    _dst.create(size, CV_8U);
1070
    Mat dst = _dst.getMat();
1071

1072
    Mat dx = _dx.getMat();
1073
    Mat dy = _dy.getMat();
1074

1075
    CV_IPP_RUN_FAST(ipp_Canny(Mat(), dx, dy, dst, (float)low_thresh, (float)high_thresh, L2gradient, 0))
1076

1077
    if (L2gradient)
1078
    {
1079
        low_thresh = std::min(32767.0, low_thresh);
1080
        high_thresh = std::min(32767.0, high_thresh);
1081

1082
        if (low_thresh > 0) low_thresh *= low_thresh;
1083
        if (high_thresh > 0) high_thresh *= high_thresh;
1084
    }
1085

1086
    int low = cvFloor(low_thresh);
1087
    int high = cvFloor(high_thresh);
1088

1089
    std::deque<uchar*> stack;
1090
    Mat map;
1091

1092
    // Minimum number of threads should be 1, maximum should not exceed number of CPU's, because of overhead
1093
    int numOfThreads = std::max(1, std::min(getNumThreads(), getNumberOfCPUs()));
1094
    if (dx.rows / numOfThreads < 3)
1095
        numOfThreads = std::max(1, dx.rows / 3);
1096

1097
    parallel_for_(Range(0, dx.rows), parallelCanny(dx, dy, map, stack, low, high, L2gradient), numOfThreads);
1098

1099
    CV_TRACE_REGION("global_hysteresis")
1100
    // now track the edges (hysteresis thresholding)
1101
    ptrdiff_t mapstep = map.cols;
1102

1103
    while (!stack.empty())
1104
    {
1105
        uchar* m = stack.back();
1106
        stack.pop_back();
1107

1108
        if (!m[-mapstep-1]) CANNY_PUSH((m-mapstep-1), stack);
1109
        if (!m[-mapstep])   CANNY_PUSH((m-mapstep), stack);
1110
        if (!m[-mapstep+1]) CANNY_PUSH((m-mapstep+1), stack);
1111
        if (!m[-1])         CANNY_PUSH((m-1), stack);
1112
        if (!m[1])          CANNY_PUSH((m+1), stack);
1113
        if (!m[mapstep-1])  CANNY_PUSH((m+mapstep-1), stack);
1114
        if (!m[mapstep])    CANNY_PUSH((m+mapstep), stack);
1115
        if (!m[mapstep+1])  CANNY_PUSH((m+mapstep+1), stack);
1116
    }
1117

1118
    CV_TRACE_REGION_NEXT("finalPass");
1119
    parallel_for_(Range(0, dx.rows), finalPass(map, dst), dx.total()/(double)(1<<16));
1120
}
1121

1122
} // namespace cv
1123

1124
void cvCanny( const CvArr* image, CvArr* edges, double threshold1,
1125
              double threshold2, int aperture_size )
1126
{
1127
    cv::Mat src = cv::cvarrToMat(image), dst = cv::cvarrToMat(edges);
1128
    CV_Assert( src.size == dst.size && src.depth() == CV_8U && dst.type() == CV_8U );
1129

1130
    cv::Canny(src, dst, threshold1, threshold2, aperture_size & 255,
1131
              (aperture_size & CV_CANNY_L2_GRADIENT) != 0);
1132
}
1133

1134
/* End of file. */
1135

1136
Product

Resources

Company