Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/imgproc/src/canny.cpp
16354 views
1
/*M///////////////////////////////////////////////////////////////////////////////////////
2
//
3
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4
//
5
// By downloading, copying, installing or using the software you agree to this license.
6
// If you do not agree to this license, do not download, install,
7
// copy or use the software.
8
//
9
//
10
// Intel License Agreement
11
// For Open Source Computer Vision Library
12
//
13
// Copyright (C) 2000, Intel Corporation, all rights reserved.
14
// Copyright (C) 2014, Itseez Inc., all rights reserved.
15
// Third party copyrights are property of their respective owners.
16
//
17
// Redistribution and use in source and binary forms, with or without modification,
18
// are permitted provided that the following conditions are met:
19
//
20
// * Redistribution's of source code must retain the above copyright notice,
21
// this list of conditions and the following disclaimer.
22
//
23
// * Redistribution's in binary form must reproduce the above copyright notice,
24
// this list of conditions and the following disclaimer in the documentation
25
// and/or other materials provided with the distribution.
26
//
27
// * The name of Intel Corporation may not be used to endorse or promote products
28
// derived from this software without specific prior written permission.
29
//
30
// This software is provided by the copyright holders and contributors "as is" and
31
// any express or implied warranties, including, but not limited to, the implied
32
// warranties of merchantability and fitness for a particular purpose are disclaimed.
33
// In no event shall the Intel Corporation or contributors be liable for any direct,
34
// indirect, incidental, special, exemplary, or consequential damages
35
// (including, but not limited to, procurement of substitute goods or services;
36
// loss of use, data, or profits; or business interruption) however caused
37
// and on any theory of liability, whether in contract, strict liability,
38
// or tort (including negligence or otherwise) arising in any way out of
39
// the use of this software, even if advised of the possibility of such damage.
40
//
41
//M*/
42
43
#include "precomp.hpp"
44
#include "opencl_kernels_imgproc.hpp"
45
#include "opencv2/core/hal/intrin.hpp"
46
#include <deque>
47
48
#include "opencv2/core/openvx/ovx_defs.hpp"
49
50
#if CV_SIMD128
51
#define CV_MALLOC_SIMD128 16
52
#endif
53
54
namespace cv
55
{
56
57
#ifdef HAVE_IPP
58
static bool ipp_Canny(const Mat& src , const Mat& dx_, const Mat& dy_, Mat& dst, float low, float high, bool L2gradient, int aperture_size)
59
{
60
#ifdef HAVE_IPP_IW
61
CV_INSTRUMENT_REGION_IPP();
62
63
#if IPP_DISABLE_PERF_CANNY_MT
64
if(cv::getNumThreads()>1)
65
return false;
66
#endif
67
68
::ipp::IwiSize size(dst.cols, dst.rows);
69
IppDataType type = ippiGetDataType(dst.depth());
70
int channels = dst.channels();
71
IppNormType norm = (L2gradient)?ippNormL2:ippNormL1;
72
73
if(size.width <= 3 || size.height <= 3)
74
return false;
75
76
if(channels != 1)
77
return false;
78
79
if(type != ipp8u)
80
return false;
81
82
if(src.empty())
83
{
84
try
85
{
86
::ipp::IwiImage iwSrcDx;
87
::ipp::IwiImage iwSrcDy;
88
::ipp::IwiImage iwDst;
89
90
ippiGetImage(dx_, iwSrcDx);
91
ippiGetImage(dy_, iwSrcDy);
92
ippiGetImage(dst, iwDst);
93
94
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterCannyDeriv, iwSrcDx, iwSrcDy, iwDst, low, high, ::ipp::IwiFilterCannyDerivParams(norm));
95
}
96
catch (const ::ipp::IwException &)
97
{
98
return false;
99
}
100
}
101
else
102
{
103
IppiMaskSize kernel;
104
105
if(aperture_size == 3)
106
kernel = ippMskSize3x3;
107
else if(aperture_size == 5)
108
kernel = ippMskSize5x5;
109
else
110
return false;
111
112
try
113
{
114
::ipp::IwiImage iwSrc;
115
::ipp::IwiImage iwDst;
116
117
ippiGetImage(src, iwSrc);
118
ippiGetImage(dst, iwDst);
119
120
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterCanny, iwSrc, iwDst, low, high, ::ipp::IwiFilterCannyParams(ippFilterSobel, kernel, norm), ippBorderRepl);
121
}
122
catch (const ::ipp::IwException &)
123
{
124
return false;
125
}
126
}
127
128
return true;
129
#else
130
CV_UNUSED(src); CV_UNUSED(dx_); CV_UNUSED(dy_); CV_UNUSED(dst); CV_UNUSED(low); CV_UNUSED(high); CV_UNUSED(L2gradient); CV_UNUSED(aperture_size);
131
return false;
132
#endif
133
}
134
#endif
135
136
#ifdef HAVE_OPENCL
137
138
template <bool useCustomDeriv>
139
static bool ocl_Canny(InputArray _src, const UMat& dx_, const UMat& dy_, OutputArray _dst, float low_thresh, float high_thresh,
140
int aperture_size, bool L2gradient, int cn, const Size & size)
141
{
142
CV_INSTRUMENT_REGION_OPENCL();
143
144
UMat map;
145
146
const ocl::Device &dev = ocl::Device::getDefault();
147
int max_wg_size = (int)dev.maxWorkGroupSize();
148
149
int lSizeX = 32;
150
int lSizeY = max_wg_size / 32;
151
152
if (lSizeY == 0)
153
{
154
lSizeX = 16;
155
lSizeY = max_wg_size / 16;
156
}
157
if (lSizeY == 0)
158
{
159
lSizeY = 1;
160
}
161
162
if (aperture_size == 7)
163
{
164
low_thresh = low_thresh / 16.0f;
165
high_thresh = high_thresh / 16.0f;
166
}
167
168
if (L2gradient)
169
{
170
low_thresh = std::min(32767.0f, low_thresh);
171
high_thresh = std::min(32767.0f, high_thresh);
172
173
if (low_thresh > 0)
174
low_thresh *= low_thresh;
175
if (high_thresh > 0)
176
high_thresh *= high_thresh;
177
}
178
int low = cvFloor(low_thresh), high = cvFloor(high_thresh);
179
180
if (!useCustomDeriv &&
181
aperture_size == 3 && !_src.isSubmatrix())
182
{
183
/*
184
stage1_with_sobel:
185
Sobel operator
186
Calc magnitudes
187
Non maxima suppression
188
Double thresholding
189
*/
190
char cvt[40];
191
ocl::Kernel with_sobel("stage1_with_sobel", ocl::imgproc::canny_oclsrc,
192
format("-D WITH_SOBEL -D cn=%d -D TYPE=%s -D convert_floatN=%s -D floatN=%s -D GRP_SIZEX=%d -D GRP_SIZEY=%d%s",
193
cn, ocl::memopTypeToStr(_src.depth()),
194
ocl::convertTypeStr(_src.depth(), CV_32F, cn, cvt),
195
ocl::typeToStr(CV_MAKE_TYPE(CV_32F, cn)),
196
lSizeX, lSizeY,
197
L2gradient ? " -D L2GRAD" : ""));
198
if (with_sobel.empty())
199
return false;
200
201
UMat src = _src.getUMat();
202
map.create(size, CV_32S);
203
with_sobel.args(ocl::KernelArg::ReadOnly(src),
204
ocl::KernelArg::WriteOnlyNoSize(map),
205
(float) low, (float) high);
206
207
size_t globalsize[2] = { (size_t)size.width, (size_t)size.height },
208
localsize[2] = { (size_t)lSizeX, (size_t)lSizeY };
209
210
if (!with_sobel.run(2, globalsize, localsize, false))
211
return false;
212
}
213
else
214
{
215
/*
216
stage1_without_sobel:
217
Calc magnitudes
218
Non maxima suppression
219
Double thresholding
220
*/
221
double scale = 1.0;
222
if (aperture_size == 7)
223
{
224
scale = 1 / 16.0;
225
}
226
227
UMat dx, dy;
228
if (!useCustomDeriv)
229
{
230
Sobel(_src, dx, CV_16S, 1, 0, aperture_size, scale, 0, BORDER_REPLICATE);
231
Sobel(_src, dy, CV_16S, 0, 1, aperture_size, scale, 0, BORDER_REPLICATE);
232
}
233
else
234
{
235
dx = dx_;
236
dy = dy_;
237
}
238
239
ocl::Kernel without_sobel("stage1_without_sobel", ocl::imgproc::canny_oclsrc,
240
format("-D WITHOUT_SOBEL -D cn=%d -D GRP_SIZEX=%d -D GRP_SIZEY=%d%s",
241
cn, lSizeX, lSizeY, L2gradient ? " -D L2GRAD" : ""));
242
if (without_sobel.empty())
243
return false;
244
245
map.create(size, CV_32S);
246
without_sobel.args(ocl::KernelArg::ReadOnlyNoSize(dx), ocl::KernelArg::ReadOnlyNoSize(dy),
247
ocl::KernelArg::WriteOnly(map),
248
low, high);
249
250
size_t globalsize[2] = { (size_t)size.width, (size_t)size.height },
251
localsize[2] = { (size_t)lSizeX, (size_t)lSizeY };
252
253
if (!without_sobel.run(2, globalsize, localsize, false))
254
return false;
255
}
256
257
int PIX_PER_WI = 8;
258
/*
259
stage2:
260
hysteresis (add weak edges if they are connected with strong edges)
261
*/
262
263
int sizey = lSizeY / PIX_PER_WI;
264
if (sizey == 0)
265
sizey = 1;
266
267
size_t globalsize[2] = { (size_t)size.width, ((size_t)size.height + PIX_PER_WI - 1) / PIX_PER_WI }, localsize[2] = { (size_t)lSizeX, (size_t)sizey };
268
269
ocl::Kernel edgesHysteresis("stage2_hysteresis", ocl::imgproc::canny_oclsrc,
270
format("-D STAGE2 -D PIX_PER_WI=%d -D LOCAL_X=%d -D LOCAL_Y=%d",
271
PIX_PER_WI, lSizeX, sizey));
272
273
if (edgesHysteresis.empty())
274
return false;
275
276
edgesHysteresis.args(ocl::KernelArg::ReadWrite(map));
277
if (!edgesHysteresis.run(2, globalsize, localsize, false))
278
return false;
279
280
// get edges
281
282
ocl::Kernel getEdgesKernel("getEdges", ocl::imgproc::canny_oclsrc,
283
format("-D GET_EDGES -D PIX_PER_WI=%d", PIX_PER_WI));
284
if (getEdgesKernel.empty())
285
return false;
286
287
_dst.create(size, CV_8UC1);
288
UMat dst = _dst.getUMat();
289
290
getEdgesKernel.args(ocl::KernelArg::ReadOnly(map), ocl::KernelArg::WriteOnlyNoSize(dst));
291
292
return getEdgesKernel.run(2, globalsize, NULL, false);
293
}
294
295
#endif
296
297
#define CANNY_PUSH(map, stack) *map = 2, stack.push_back(map)
298
299
#define CANNY_CHECK_SIMD(m, high, map, stack) \
300
if (m > high) \
301
CANNY_PUSH(map, stack); \
302
else \
303
*map = 0
304
305
#define CANNY_CHECK(m, high, map, stack) \
306
if (m > high) \
307
CANNY_PUSH(map, stack); \
308
else \
309
*map = 0; \
310
continue
311
312
class parallelCanny : public ParallelLoopBody
313
{
314
public:
315
parallelCanny(const Mat &_src, Mat &_map, std::deque<uchar*> &borderPeaksParallel,
316
int _low, int _high, int _aperture_size, bool _L2gradient) :
317
src(_src), src2(_src), map(_map), _borderPeaksParallel(borderPeaksParallel),
318
low(_low), high(_high), aperture_size(_aperture_size), L2gradient(_L2gradient)
319
{
320
#if CV_SIMD128
321
haveSIMD = hasSIMD128();
322
if(haveSIMD)
323
_map.create(src.rows + 2, (int)alignSize((size_t)(src.cols + CV_MALLOC_SIMD128 + 1), CV_MALLOC_SIMD128), CV_8UC1);
324
else
325
#endif
326
_map.create(src.rows + 2, src.cols + 2, CV_8UC1);
327
map = _map;
328
map.row(0).setTo(1);
329
map.row(src.rows + 1).setTo(1);
330
mapstep = map.cols;
331
needGradient = true;
332
cn = src.channels();
333
}
334
335
parallelCanny(const Mat &_dx, const Mat &_dy, Mat &_map, std::deque<uchar*> &borderPeaksParallel,
336
int _low, int _high, bool _L2gradient) :
337
src(_dx), src2(_dy), map(_map), _borderPeaksParallel(borderPeaksParallel),
338
low(_low), high(_high), aperture_size(0), L2gradient(_L2gradient)
339
{
340
#if CV_SIMD128
341
haveSIMD = hasSIMD128();
342
if(haveSIMD)
343
_map.create(src.rows + 2, (int)alignSize((size_t)(src.cols + CV_MALLOC_SIMD128 + 1), CV_MALLOC_SIMD128), CV_8UC1);
344
else
345
#endif
346
_map.create(src.rows + 2, src.cols + 2, CV_8UC1);
347
map = _map;
348
map.row(0).setTo(1);
349
map.row(src.rows + 1).setTo(1);
350
mapstep = map.cols;
351
needGradient = false;
352
cn = src.channels();
353
}
354
355
~parallelCanny() {}
356
357
parallelCanny& operator=(const parallelCanny&) { return *this; }
358
359
void operator()(const Range &boundaries) const CV_OVERRIDE
360
{
361
CV_TRACE_FUNCTION();
362
363
Mat dx, dy;
364
AutoBuffer<short> dxMax(0), dyMax(0);
365
std::deque<uchar*> stack, borderPeaksLocal;
366
const int rowStart = max(0, boundaries.start - 1), rowEnd = min(src.rows, boundaries.end + 1);
367
int *_mag_p, *_mag_a, *_mag_n;
368
short *_dx, *_dy, *_dx_a = NULL, *_dy_a = NULL, *_dx_n = NULL, *_dy_n = NULL;
369
uchar *_pmap;
370
double scale = 1.0;
371
372
CV_TRACE_REGION("gradient")
373
if(needGradient)
374
{
375
if (aperture_size == 7)
376
{
377
scale = 1 / 16.0;
378
}
379
Sobel(src.rowRange(rowStart, rowEnd), dx, CV_16S, 1, 0, aperture_size, scale, 0, BORDER_REPLICATE);
380
Sobel(src.rowRange(rowStart, rowEnd), dy, CV_16S, 0, 1, aperture_size, scale, 0, BORDER_REPLICATE);
381
}
382
else
383
{
384
dx = src.rowRange(rowStart, rowEnd);
385
dy = src2.rowRange(rowStart, rowEnd);
386
}
387
388
CV_TRACE_REGION_NEXT("magnitude");
389
if(cn > 1)
390
{
391
dxMax.allocate(2 * dx.cols);
392
dyMax.allocate(2 * dy.cols);
393
_dx_a = dxMax.data();
394
_dx_n = _dx_a + dx.cols;
395
_dy_a = dyMax.data();
396
_dy_n = _dy_a + dy.cols;
397
}
398
399
// _mag_p: previous row, _mag_a: actual row, _mag_n: next row
400
#if CV_SIMD128
401
AutoBuffer<int> buffer(3 * (mapstep * cn + CV_MALLOC_SIMD128));
402
_mag_p = alignPtr(buffer.data() + 1, CV_MALLOC_SIMD128);
403
_mag_a = alignPtr(_mag_p + mapstep * cn, CV_MALLOC_SIMD128);
404
_mag_n = alignPtr(_mag_a + mapstep * cn, CV_MALLOC_SIMD128);
405
#else
406
AutoBuffer<int> buffer(3 * (mapstep * cn));
407
_mag_p = buffer.data() + 1;
408
_mag_a = _mag_p + mapstep * cn;
409
_mag_n = _mag_a + mapstep * cn;
410
#endif
411
412
// For the first time when just 2 rows are filled and for left and right borders
413
if(rowStart == boundaries.start)
414
memset(_mag_n - 1, 0, mapstep * sizeof(int));
415
else
416
_mag_n[src.cols] = _mag_n[-1] = 0;
417
418
_mag_a[src.cols] = _mag_a[-1] = _mag_p[src.cols] = _mag_p[-1] = 0;
419
420
// calculate magnitude and angle of gradient, perform non-maxima suppression.
421
// fill the map with one of the following values:
422
// 0 - the pixel might belong to an edge
423
// 1 - the pixel can not belong to an edge
424
// 2 - the pixel does belong to an edge
425
for (int i = rowStart; i <= boundaries.end; ++i)
426
{
427
// Scroll the ring buffer
428
std::swap(_mag_n, _mag_a);
429
std::swap(_mag_n, _mag_p);
430
431
if(i < rowEnd)
432
{
433
// Next row calculation
434
_dx = dx.ptr<short>(i - rowStart);
435
_dy = dy.ptr<short>(i - rowStart);
436
437
if (L2gradient)
438
{
439
int j = 0, width = src.cols * cn;
440
#if CV_SIMD128
441
if (haveSIMD)
442
{
443
for ( ; j <= width - 8; j += 8)
444
{
445
v_int16x8 v_dx = v_load((const short*)(_dx + j));
446
v_int16x8 v_dy = v_load((const short*)(_dy + j));
447
448
v_int32x4 v_dxp_low, v_dxp_high;
449
v_int32x4 v_dyp_low, v_dyp_high;
450
v_expand(v_dx, v_dxp_low, v_dxp_high);
451
v_expand(v_dy, v_dyp_low, v_dyp_high);
452
453
v_store_aligned((int *)(_mag_n + j), v_dxp_low*v_dxp_low+v_dyp_low*v_dyp_low);
454
v_store_aligned((int *)(_mag_n + j + 4), v_dxp_high*v_dxp_high+v_dyp_high*v_dyp_high);
455
}
456
}
457
#endif
458
for ( ; j < width; ++j)
459
_mag_n[j] = int(_dx[j])*_dx[j] + int(_dy[j])*_dy[j];
460
}
461
else
462
{
463
int j = 0, width = src.cols * cn;
464
#if CV_SIMD128
465
if (haveSIMD)
466
{
467
for(; j <= width - 8; j += 8)
468
{
469
v_int16x8 v_dx = v_load((const short *)(_dx + j));
470
v_int16x8 v_dy = v_load((const short *)(_dy + j));
471
472
v_dx = v_reinterpret_as_s16(v_abs(v_dx));
473
v_dy = v_reinterpret_as_s16(v_abs(v_dy));
474
475
v_int32x4 v_dx_ml, v_dy_ml, v_dx_mh, v_dy_mh;
476
v_expand(v_dx, v_dx_ml, v_dx_mh);
477
v_expand(v_dy, v_dy_ml, v_dy_mh);
478
479
v_store_aligned((int *)(_mag_n + j), v_dx_ml + v_dy_ml);
480
v_store_aligned((int *)(_mag_n + j + 4), v_dx_mh + v_dy_mh);
481
}
482
}
483
#endif
484
for ( ; j < width; ++j)
485
_mag_n[j] = std::abs(int(_dx[j])) + std::abs(int(_dy[j]));
486
}
487
488
if(cn > 1)
489
{
490
std::swap(_dx_n, _dx_a);
491
std::swap(_dy_n, _dy_a);
492
493
for(int j = 0, jn = 0; j < src.cols; ++j, jn += cn)
494
{
495
int maxIdx = jn;
496
for(int k = 1; k < cn; ++k)
497
if(_mag_n[jn + k] > _mag_n[maxIdx]) maxIdx = jn + k;
498
499
_mag_n[j] = _mag_n[maxIdx];
500
_dx_n[j] = _dx[maxIdx];
501
_dy_n[j] = _dy[maxIdx];
502
}
503
504
_mag_n[src.cols] = 0;
505
}
506
507
// at the very beginning we do not have a complete ring
508
// buffer of 3 magnitude rows for non-maxima suppression
509
if (i <= boundaries.start)
510
continue;
511
}
512
else
513
{
514
memset(_mag_n - 1, 0, mapstep * sizeof(int));
515
516
if(cn > 1)
517
{
518
std::swap(_dx_n, _dx_a);
519
std::swap(_dy_n, _dy_a);
520
}
521
}
522
523
// From here actual src row is (i - 1)
524
// Set left and right border to 1
525
#if CV_SIMD128
526
if(haveSIMD)
527
_pmap = map.ptr<uchar>(i) + CV_MALLOC_SIMD128;
528
else
529
#endif
530
_pmap = map.ptr<uchar>(i) + 1;
531
532
_pmap[src.cols] =_pmap[-1] = 1;
533
534
if(cn == 1)
535
{
536
_dx = dx.ptr<short>(i - rowStart - 1);
537
_dy = dy.ptr<short>(i - rowStart - 1);
538
}
539
else
540
{
541
_dx = _dx_a;
542
_dy = _dy_a;
543
}
544
545
const int TG22 = 13573;
546
int j = 0;
547
#if CV_SIMD128
548
if (haveSIMD)
549
{
550
const v_int32x4 v_low = v_setall_s32(low);
551
const v_int8x16 v_one = v_setall_s8(1);
552
553
for (; j <= src.cols - 32; j += 32)
554
{
555
v_int32x4 v_m1 = v_load_aligned((const int*)(_mag_a + j));
556
v_int32x4 v_m2 = v_load_aligned((const int*)(_mag_a + j + 4));
557
v_int32x4 v_m3 = v_load_aligned((const int*)(_mag_a + j + 8));
558
v_int32x4 v_m4 = v_load_aligned((const int*)(_mag_a + j + 12));
559
560
v_int32x4 v_cmp1 = v_m1 > v_low;
561
v_int32x4 v_cmp2 = v_m2 > v_low;
562
v_int32x4 v_cmp3 = v_m3 > v_low;
563
v_int32x4 v_cmp4 = v_m4 > v_low;
564
565
v_m1 = v_load_aligned((const int*)(_mag_a + j + 16));
566
v_m2 = v_load_aligned((const int*)(_mag_a + j + 20));
567
v_m3 = v_load_aligned((const int*)(_mag_a + j + 24));
568
v_m4 = v_load_aligned((const int*)(_mag_a + j + 28));
569
570
v_store_aligned((signed char*)(_pmap + j), v_one);
571
v_store_aligned((signed char*)(_pmap + j + 16), v_one);
572
573
v_int16x8 v_cmp80 = v_pack(v_cmp1, v_cmp2);
574
v_int16x8 v_cmp81 = v_pack(v_cmp3, v_cmp4);
575
576
v_cmp1 = v_m1 > v_low;
577
v_cmp2 = v_m2 > v_low;
578
v_cmp3 = v_m3 > v_low;
579
v_cmp4 = v_m4 > v_low;
580
581
v_int8x16 v_cmp = v_pack(v_cmp80, v_cmp81);
582
583
v_cmp80 = v_pack(v_cmp1, v_cmp2);
584
v_cmp81 = v_pack(v_cmp3, v_cmp4);
585
586
unsigned int mask = v_signmask(v_cmp);
587
588
v_cmp = v_pack(v_cmp80, v_cmp81);
589
mask |= v_signmask(v_cmp) << 16;
590
591
if (mask)
592
{
593
int k = j;
594
595
do
596
{
597
int l = trailingZeros32(mask);
598
k += l;
599
mask >>= l;
600
601
int m = _mag_a[k];
602
short xs = _dx[k];
603
short ys = _dy[k];
604
int x = (int)std::abs(xs);
605
int y = (int)std::abs(ys) << 15;
606
607
int tg22x = x * TG22;
608
609
if (y < tg22x)
610
{
611
if (m > _mag_a[k - 1] && m >= _mag_a[k + 1])
612
{
613
CANNY_CHECK_SIMD(m, high, (_pmap+k), stack);
614
}
615
}
616
else
617
{
618
int tg67x = tg22x + (x << 16);
619
if (y > tg67x)
620
{
621
if (m > _mag_p[k] && m >= _mag_n[k])
622
{
623
CANNY_CHECK_SIMD(m, high, (_pmap+k), stack);
624
}
625
}
626
else
627
{
628
int s = (xs ^ ys) < 0 ? -1 : 1;
629
if(m > _mag_p[k - s] && m > _mag_n[k + s])
630
{
631
CANNY_CHECK_SIMD(m, high, (_pmap+k), stack);
632
}
633
}
634
}
635
++k;
636
} while((mask >>= 1));
637
}
638
}
639
640
if (j <= src.cols - 16)
641
{
642
v_int32x4 v_m1 = v_load_aligned((const int*)(_mag_a + j));
643
v_int32x4 v_m2 = v_load_aligned((const int*)(_mag_a + j + 4));
644
v_int32x4 v_m3 = v_load_aligned((const int*)(_mag_a + j + 8));
645
v_int32x4 v_m4 = v_load_aligned((const int*)(_mag_a + j + 12));
646
647
v_store_aligned((signed char*)(_pmap + j), v_one);
648
649
v_int32x4 v_cmp1 = v_m1 > v_low;
650
v_int32x4 v_cmp2 = v_m2 > v_low;
651
v_int32x4 v_cmp3 = v_m3 > v_low;
652
v_int32x4 v_cmp4 = v_m4 > v_low;
653
654
v_int16x8 v_cmp80 = v_pack(v_cmp1, v_cmp2);
655
v_int16x8 v_cmp81 = v_pack(v_cmp3, v_cmp4);
656
657
v_int8x16 v_cmp = v_pack(v_cmp80, v_cmp81);
658
unsigned int mask = v_signmask(v_cmp);
659
660
if (mask)
661
{
662
int k = j;
663
664
do
665
{
666
int l = trailingZeros32(mask);
667
k += l;
668
mask >>= l;
669
670
int m = _mag_a[k];
671
short xs = _dx[k];
672
short ys = _dy[k];
673
int x = (int)std::abs(xs);
674
int y = (int)std::abs(ys) << 15;
675
676
int tg22x = x * TG22;
677
678
if (y < tg22x)
679
{
680
if (m > _mag_a[k - 1] && m >= _mag_a[k + 1])
681
{
682
CANNY_CHECK_SIMD(m, high, (_pmap+k), stack);
683
}
684
}
685
else
686
{
687
int tg67x = tg22x + (x << 16);
688
if (y > tg67x)
689
{
690
if (m > _mag_p[k] && m >= _mag_n[k])
691
{
692
CANNY_CHECK_SIMD(m, high, (_pmap+k), stack);
693
}
694
}
695
else
696
{
697
int s = (xs ^ ys) < 0 ? -1 : 1;
698
if(m > _mag_p[k - s] && m > _mag_n[k + s])
699
{
700
CANNY_CHECK_SIMD(m, high, (_pmap+k), stack);
701
}
702
}
703
}
704
++k;
705
} while((mask >>= 1));
706
}
707
j += 16;
708
}
709
}
710
#endif
711
for (; j < src.cols; j++)
712
{
713
int m = _mag_a[j];
714
715
if (m > low)
716
{
717
short xs = _dx[j];
718
short ys = _dy[j];
719
int x = (int)std::abs(xs);
720
int y = (int)std::abs(ys) << 15;
721
722
int tg22x = x * TG22;
723
724
if (y < tg22x)
725
{
726
if (m > _mag_a[j - 1] && m >= _mag_a[j + 1])
727
{
728
CANNY_CHECK(m, high, (_pmap+j), stack);
729
}
730
}
731
else
732
{
733
int tg67x = tg22x + (x << 16);
734
if (y > tg67x)
735
{
736
if (m > _mag_p[j] && m >= _mag_n[j])
737
{
738
CANNY_CHECK(m, high, (_pmap+j), stack);
739
}
740
}
741
else
742
{
743
int s = (xs ^ ys) < 0 ? -1 : 1;
744
if(m > _mag_p[j - s] && m > _mag_n[j + s])
745
{
746
CANNY_CHECK(m, high, (_pmap+j), stack);
747
}
748
}
749
}
750
}
751
_pmap[j] = 1;
752
}
753
}
754
755
// Not for first row of first slice or last row of last slice
756
uchar *pmapLower = (rowStart == 0) ? map.data : (map.data + (boundaries.start + 2) * mapstep);
757
uint pmapDiff = (uint)(((rowEnd == src.rows) ? map.datalimit : (map.data + boundaries.end * mapstep)) - pmapLower);
758
759
// now track the edges (hysteresis thresholding)
760
CV_TRACE_REGION_NEXT("hysteresis");
761
while (!stack.empty())
762
{
763
uchar *m = stack.back();
764
stack.pop_back();
765
766
// Stops thresholding from expanding to other slices by sending pixels in the borders of each
767
// slice in a queue to be serially processed later.
768
if((unsigned)(m - pmapLower) < pmapDiff)
769
{
770
if (!m[-mapstep-1]) CANNY_PUSH((m-mapstep-1), stack);
771
if (!m[-mapstep]) CANNY_PUSH((m-mapstep), stack);
772
if (!m[-mapstep+1]) CANNY_PUSH((m-mapstep+1), stack);
773
if (!m[-1]) CANNY_PUSH((m-1), stack);
774
if (!m[1]) CANNY_PUSH((m+1), stack);
775
if (!m[mapstep-1]) CANNY_PUSH((m+mapstep-1), stack);
776
if (!m[mapstep]) CANNY_PUSH((m+mapstep), stack);
777
if (!m[mapstep+1]) CANNY_PUSH((m+mapstep+1), stack);
778
}
779
else
780
{
781
borderPeaksLocal.push_back(m);
782
ptrdiff_t mapstep2 = m < pmapLower ? mapstep : -mapstep;
783
784
if (!m[-1]) CANNY_PUSH((m-1), stack);
785
if (!m[1]) CANNY_PUSH((m+1), stack);
786
if (!m[mapstep2-1]) CANNY_PUSH((m+mapstep2-1), stack);
787
if (!m[mapstep2]) CANNY_PUSH((m+mapstep2), stack);
788
if (!m[mapstep2+1]) CANNY_PUSH((m+mapstep2+1), stack);
789
}
790
}
791
792
if(!borderPeaksLocal.empty())
793
{
794
AutoLock lock(mutex);
795
_borderPeaksParallel.insert(_borderPeaksParallel.end(), borderPeaksLocal.begin(), borderPeaksLocal.end());
796
}
797
}
798
799
private:
800
const Mat &src, &src2;
801
Mat &map;
802
std::deque<uchar*> &_borderPeaksParallel;
803
int low, high, aperture_size;
804
bool L2gradient, needGradient;
805
ptrdiff_t mapstep;
806
int cn;
807
#if CV_SIMD128
808
bool haveSIMD;
809
#endif
810
mutable Mutex mutex;
811
};
812
813
class finalPass : public ParallelLoopBody
814
{
815
816
public:
817
finalPass(const Mat &_map, Mat &_dst) :
818
map(_map), dst(_dst)
819
{
820
dst = _dst;
821
#if CV_SIMD128
822
haveSIMD = hasSIMD128();
823
#endif
824
}
825
826
~finalPass() {}
827
828
void operator()(const Range &boundaries) const CV_OVERRIDE
829
{
830
// the final pass, form the final image
831
for (int i = boundaries.start; i < boundaries.end; i++)
832
{
833
int j = 0;
834
uchar *pdst = dst.ptr<uchar>(i);
835
const uchar *pmap = map.ptr<uchar>(i + 1);
836
#if CV_SIMD128
837
if(haveSIMD)
838
pmap += CV_MALLOC_SIMD128;
839
else
840
#endif
841
pmap += 1;
842
#if CV_SIMD128
843
if(haveSIMD) {
844
const v_uint8x16 v_zero = v_setzero_u8();
845
const v_uint8x16 v_ff = ~v_zero;
846
const v_uint8x16 v_two(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
847
848
for (; j <= dst.cols - 16; j += 16)
849
{
850
v_uint8x16 v_pmap = v_load_aligned((const unsigned char*)(pmap + j));
851
v_pmap = v_select(v_pmap == v_two, v_ff, v_zero);
852
v_store((pdst + j), v_pmap);
853
}
854
855
if (j <= dst.cols - 8)
856
{
857
v_uint8x16 v_pmap = v_load_low((const unsigned char*)(pmap + j));
858
v_pmap = v_select(v_pmap == v_two, v_ff, v_zero);
859
v_store_low((pdst + j), v_pmap);
860
j += 8;
861
}
862
}
863
#endif
864
for (; j < dst.cols; j++)
865
{
866
pdst[j] = (uchar)-(pmap[j] >> 1);
867
}
868
}
869
}
870
871
private:
872
const Mat &map;
873
Mat &dst;
874
#if CV_SIMD128
875
bool haveSIMD;
876
#endif
877
878
finalPass(const finalPass&); // = delete
879
finalPass& operator=(const finalPass&); // = delete
880
};
881
882
#ifdef HAVE_OPENVX
883
namespace ovx {
884
template <> inline bool skipSmallImages<VX_KERNEL_CANNY_EDGE_DETECTOR>(int w, int h) { return w*h < 640 * 480; }
885
}
886
static bool openvx_canny(const Mat& src, Mat& dst, int loVal, int hiVal, int kSize, bool useL2)
887
{
888
using namespace ivx;
889
890
Context context = ovx::getOpenVXContext();
891
try
892
{
893
Image _src = Image::createFromHandle(
894
context,
895
Image::matTypeToFormat(src.type()),
896
Image::createAddressing(src),
897
src.data );
898
Image _dst = Image::createFromHandle(
899
context,
900
Image::matTypeToFormat(dst.type()),
901
Image::createAddressing(dst),
902
dst.data );
903
Threshold threshold = Threshold::createRange(context, VX_TYPE_UINT8, saturate_cast<uchar>(loVal), saturate_cast<uchar>(hiVal));
904
905
#if 0
906
// the code below is disabled because vxuCannyEdgeDetector()
907
// ignores context attribute VX_CONTEXT_IMMEDIATE_BORDER
908
909
// FIXME: may fail in multithread case
910
border_t prevBorder = context.immediateBorder();
911
context.setImmediateBorder(VX_BORDER_REPLICATE);
912
IVX_CHECK_STATUS( vxuCannyEdgeDetector(context, _src, threshold, kSize, (useL2 ? VX_NORM_L2 : VX_NORM_L1), _dst) );
913
context.setImmediateBorder(prevBorder);
914
#else
915
// alternative code without vxuCannyEdgeDetector()
916
Graph graph = Graph::create(context);
917
ivx::Node node = ivx::Node(vxCannyEdgeDetectorNode(graph, _src, threshold, kSize, (useL2 ? VX_NORM_L2 : VX_NORM_L1), _dst) );
918
node.setBorder(VX_BORDER_REPLICATE);
919
graph.verify();
920
graph.process();
921
#endif
922
923
#ifdef VX_VERSION_1_1
924
_src.swapHandle();
925
_dst.swapHandle();
926
#endif
927
}
928
catch(const WrapperError& e)
929
{
930
VX_DbgThrow(e.what());
931
}
932
catch(const RuntimeError& e)
933
{
934
VX_DbgThrow(e.what());
935
}
936
937
return true;
938
}
939
#endif // HAVE_OPENVX
940
941
void Canny( InputArray _src, OutputArray _dst,
942
double low_thresh, double high_thresh,
943
int aperture_size, bool L2gradient )
944
{
945
CV_INSTRUMENT_REGION();
946
947
CV_Assert( _src.depth() == CV_8U );
948
949
const Size size = _src.size();
950
951
// we don't support inplace parameters in case with RGB/BGR src
952
CV_Assert((_dst.getObj() != _src.getObj() || _src.type() == CV_8UC1) && "Inplace parameters are not supported");
953
954
_dst.create(size, CV_8U);
955
956
if (!L2gradient && (aperture_size & CV_CANNY_L2_GRADIENT) == CV_CANNY_L2_GRADIENT)
957
{
958
// backward compatibility
959
aperture_size &= ~CV_CANNY_L2_GRADIENT;
960
L2gradient = true;
961
}
962
963
if ((aperture_size & 1) == 0 || (aperture_size != -1 && (aperture_size < 3 || aperture_size > 7)))
964
CV_Error(CV_StsBadFlag, "Aperture size should be odd between 3 and 7");
965
966
if (aperture_size == 7)
967
{
968
low_thresh = low_thresh / 16.0;
969
high_thresh = high_thresh / 16.0;
970
}
971
972
if (low_thresh > high_thresh)
973
std::swap(low_thresh, high_thresh);
974
975
CV_OCL_RUN(_dst.isUMat() && (_src.channels() == 1 || _src.channels() == 3),
976
ocl_Canny<false>(_src, UMat(), UMat(), _dst, (float)low_thresh, (float)high_thresh, aperture_size, L2gradient, _src.channels(), size))
977
978
Mat src0 = _src.getMat(), dst = _dst.getMat();
979
Mat src(src0.size(), src0.type(), src0.data, src0.step);
980
981
CALL_HAL(canny, cv_hal_canny, src.data, src.step, dst.data, dst.step, src.cols, src.rows, src.channels(),
982
low_thresh, high_thresh, aperture_size, L2gradient);
983
984
CV_OVX_RUN(
985
false && /* disabling due to accuracy issues */
986
src.type() == CV_8UC1 &&
987
!src.isSubmatrix() &&
988
src.cols >= aperture_size &&
989
src.rows >= aperture_size &&
990
!ovx::skipSmallImages<VX_KERNEL_CANNY_EDGE_DETECTOR>(src.cols, src.rows),
991
openvx_canny(
992
src,
993
dst,
994
cvFloor(low_thresh),
995
cvFloor(high_thresh),
996
aperture_size,
997
L2gradient ) )
998
999
CV_IPP_RUN_FAST(ipp_Canny(src, Mat(), Mat(), dst, (float)low_thresh, (float)high_thresh, L2gradient, aperture_size))
1000
1001
if (L2gradient)
1002
{
1003
low_thresh = std::min(32767.0, low_thresh);
1004
high_thresh = std::min(32767.0, high_thresh);
1005
1006
if (low_thresh > 0) low_thresh *= low_thresh;
1007
if (high_thresh > 0) high_thresh *= high_thresh;
1008
}
1009
int low = cvFloor(low_thresh);
1010
int high = cvFloor(high_thresh);
1011
1012
// If Scharr filter: aperture size is 3, ksize2 is 1
1013
int ksize2 = aperture_size < 0 ? 1 : aperture_size / 2;
1014
// Minimum number of threads should be 1, maximum should not exceed number of CPU's, because of overhead
1015
int numOfThreads = std::max(1, std::min(getNumThreads(), getNumberOfCPUs()));
1016
// Make a fallback for pictures with too few rows.
1017
int grainSize = src.rows / numOfThreads;
1018
int minGrainSize = 2 * (ksize2 + 1);
1019
if (grainSize < minGrainSize)
1020
numOfThreads = std::max(1, src.rows / minGrainSize);
1021
1022
Mat map;
1023
std::deque<uchar*> stack;
1024
1025
parallel_for_(Range(0, src.rows), parallelCanny(src, map, stack, low, high, aperture_size, L2gradient), numOfThreads);
1026
1027
CV_TRACE_REGION("global_hysteresis");
1028
// now track the edges (hysteresis thresholding)
1029
ptrdiff_t mapstep = map.cols;
1030
1031
while (!stack.empty())
1032
{
1033
uchar* m = stack.back();
1034
stack.pop_back();
1035
1036
if (!m[-mapstep-1]) CANNY_PUSH((m-mapstep-1), stack);
1037
if (!m[-mapstep]) CANNY_PUSH((m-mapstep), stack);
1038
if (!m[-mapstep+1]) CANNY_PUSH((m-mapstep+1), stack);
1039
if (!m[-1]) CANNY_PUSH((m-1), stack);
1040
if (!m[1]) CANNY_PUSH((m+1), stack);
1041
if (!m[mapstep-1]) CANNY_PUSH((m+mapstep-1), stack);
1042
if (!m[mapstep]) CANNY_PUSH((m+mapstep), stack);
1043
if (!m[mapstep+1]) CANNY_PUSH((m+mapstep+1), stack);
1044
}
1045
1046
CV_TRACE_REGION_NEXT("finalPass");
1047
parallel_for_(Range(0, src.rows), finalPass(map, dst), src.total()/(double)(1<<16));
1048
}
1049
1050
void Canny( InputArray _dx, InputArray _dy, OutputArray _dst,
1051
double low_thresh, double high_thresh,
1052
bool L2gradient )
1053
{
1054
CV_INSTRUMENT_REGION();
1055
1056
CV_Assert(_dx.dims() == 2);
1057
CV_Assert(_dx.type() == CV_16SC1 || _dx.type() == CV_16SC3);
1058
CV_Assert(_dy.type() == _dx.type());
1059
CV_Assert(_dx.sameSize(_dy));
1060
1061
if (low_thresh > high_thresh)
1062
std::swap(low_thresh, high_thresh);
1063
1064
const Size size = _dx.size();
1065
1066
CV_OCL_RUN(_dst.isUMat(),
1067
ocl_Canny<true>(UMat(), _dx.getUMat(), _dy.getUMat(), _dst, (float)low_thresh, (float)high_thresh, 0, L2gradient, _dx.channels(), size))
1068
1069
_dst.create(size, CV_8U);
1070
Mat dst = _dst.getMat();
1071
1072
Mat dx = _dx.getMat();
1073
Mat dy = _dy.getMat();
1074
1075
CV_IPP_RUN_FAST(ipp_Canny(Mat(), dx, dy, dst, (float)low_thresh, (float)high_thresh, L2gradient, 0))
1076
1077
if (L2gradient)
1078
{
1079
low_thresh = std::min(32767.0, low_thresh);
1080
high_thresh = std::min(32767.0, high_thresh);
1081
1082
if (low_thresh > 0) low_thresh *= low_thresh;
1083
if (high_thresh > 0) high_thresh *= high_thresh;
1084
}
1085
1086
int low = cvFloor(low_thresh);
1087
int high = cvFloor(high_thresh);
1088
1089
std::deque<uchar*> stack;
1090
Mat map;
1091
1092
// Minimum number of threads should be 1, maximum should not exceed number of CPU's, because of overhead
1093
int numOfThreads = std::max(1, std::min(getNumThreads(), getNumberOfCPUs()));
1094
if (dx.rows / numOfThreads < 3)
1095
numOfThreads = std::max(1, dx.rows / 3);
1096
1097
parallel_for_(Range(0, dx.rows), parallelCanny(dx, dy, map, stack, low, high, L2gradient), numOfThreads);
1098
1099
CV_TRACE_REGION("global_hysteresis")
1100
// now track the edges (hysteresis thresholding)
1101
ptrdiff_t mapstep = map.cols;
1102
1103
while (!stack.empty())
1104
{
1105
uchar* m = stack.back();
1106
stack.pop_back();
1107
1108
if (!m[-mapstep-1]) CANNY_PUSH((m-mapstep-1), stack);
1109
if (!m[-mapstep]) CANNY_PUSH((m-mapstep), stack);
1110
if (!m[-mapstep+1]) CANNY_PUSH((m-mapstep+1), stack);
1111
if (!m[-1]) CANNY_PUSH((m-1), stack);
1112
if (!m[1]) CANNY_PUSH((m+1), stack);
1113
if (!m[mapstep-1]) CANNY_PUSH((m+mapstep-1), stack);
1114
if (!m[mapstep]) CANNY_PUSH((m+mapstep), stack);
1115
if (!m[mapstep+1]) CANNY_PUSH((m+mapstep+1), stack);
1116
}
1117
1118
CV_TRACE_REGION_NEXT("finalPass");
1119
parallel_for_(Range(0, dx.rows), finalPass(map, dst), dx.total()/(double)(1<<16));
1120
}
1121
1122
} // namespace cv
1123
1124
void cvCanny( const CvArr* image, CvArr* edges, double threshold1,
1125
double threshold2, int aperture_size )
1126
{
1127
cv::Mat src = cv::cvarrToMat(image), dst = cv::cvarrToMat(edges);
1128
CV_Assert( src.size == dst.size && src.depth() == CV_8U && dst.type() == CV_8U );
1129
1130
cv::Canny(src, dst, threshold1, threshold2, aperture_size & 255,
1131
(aperture_size & CV_CANNY_L2_GRADIENT) != 0);
1132
}
1133
1134
/* End of file. */
1135
1136