Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/3rdparty/carotene/src/canny.cpp
16337 views
1
/*
2
* By downloading, copying, installing or using the software you agree to this license.
3
* If you do not agree to this license, do not download, install,
4
* copy or use the software.
5
*
6
*
7
* License Agreement
8
* For Open Source Computer Vision Library
9
* (3-clause BSD License)
10
*
11
* Copyright (C) 2012-2015, NVIDIA Corporation, all rights reserved.
12
* Third party copyrights are property of their respective owners.
13
*
14
* Redistribution and use in source and binary forms, with or without modification,
15
* are permitted provided that the following conditions are met:
16
*
17
* * Redistributions of source code must retain the above copyright notice,
18
* this list of conditions and the following disclaimer.
19
*
20
* * Redistributions in binary form must reproduce the above copyright notice,
21
* this list of conditions and the following disclaimer in the documentation
22
* and/or other materials provided with the distribution.
23
*
24
* * Neither the names of the copyright holders nor the names of the contributors
25
* may be used to endorse or promote products derived from this software
26
* without specific prior written permission.
27
*
28
* This software is provided by the copyright holders and contributors "as is" and
29
* any express or implied warranties, including, but not limited to, the implied
30
* warranties of merchantability and fitness for a particular purpose are disclaimed.
31
* In no event shall copyright holders or contributors be liable for any direct,
32
* indirect, incidental, special, exemplary, or consequential damages
33
* (including, but not limited to, procurement of substitute goods or services;
34
* loss of use, data, or profits; or business interruption) however caused
35
* and on any theory of liability, whether in contract, strict liability,
36
* or tort (including negligence or otherwise) arising in any way out of
37
* the use of this software, even if advised of the possibility of such damage.
38
*/
39
40
#include "common.hpp"
41
42
#include "saturate_cast.hpp"
43
#include <vector>
44
#include <cstring>
45
46
namespace CAROTENE_NS {
47
48
#ifdef CAROTENE_NEON
49
namespace {
50
struct RowFilter3x3Canny
51
{
52
inline RowFilter3x3Canny(const ptrdiff_t borderxl, const ptrdiff_t borderxr)
53
{
54
vfmask = vreinterpret_u8_u64(vmov_n_u64(borderxl ? 0x0000FFffFFffFFffULL : 0x0100FFffFFffFFffULL));
55
vtmask = vreinterpret_u8_u64(vmov_n_u64(borderxr ? 0x0707060504030201ULL : 0x0706050403020100ULL));
56
lookLeft = offsetk - borderxl;
57
lookRight = offsetk - borderxr;
58
}
59
60
inline void operator()(const u8* src, s16* dstx, s16* dsty, ptrdiff_t width)
61
{
62
uint8x8_t l = vtbl1_u8(vld1_u8(src - lookLeft), vfmask);
63
ptrdiff_t i = 0;
64
for (; i < width - 8 + lookRight; i += 8)
65
{
66
internal::prefetch(src + i);
67
uint8x8_t l18u = vld1_u8(src + i + 1);
68
69
uint8x8_t l2 = l18u;
70
uint8x8_t l0 = vext_u8(l, l18u, 6);
71
int16x8_t l1x2 = vreinterpretq_s16_u16(vshll_n_u8(vext_u8(l, l18u, 7), 1));
72
73
l = l18u;
74
75
int16x8_t l02 = vreinterpretq_s16_u16(vaddl_u8(l2, l0));
76
int16x8_t ldx = vreinterpretq_s16_u16(vsubl_u8(l2, l0));
77
int16x8_t ldy = vaddq_s16(l02, l1x2);
78
79
vst1q_s16(dstx + i, ldx);
80
vst1q_s16(dsty + i, ldy);
81
}
82
83
//tail
84
if (lookRight == 0 || i != width)
85
{
86
uint8x8_t tail0 = vld1_u8(src + (width - 9));//can't get left 1 pixel another way if width==8*k+1
87
uint8x8_t tail2 = vtbl1_u8(vld1_u8(src + (width - 8 + lookRight)), vtmask);
88
uint8x8_t tail1 = vext_u8(vreinterpret_u8_u64(vshl_n_u64(vreinterpret_u64_u8(tail0), 8*6)), tail2, 7);
89
90
int16x8_t tail02 = vreinterpretq_s16_u16(vaddl_u8(tail2, tail0));
91
int16x8_t tail1x2 = vreinterpretq_s16_u16(vshll_n_u8(tail1, 1));
92
int16x8_t taildx = vreinterpretq_s16_u16(vsubl_u8(tail2, tail0));
93
int16x8_t taildy = vqaddq_s16(tail02, tail1x2);
94
95
vst1q_s16(dstx + (width - 8), taildx);
96
vst1q_s16(dsty + (width - 8), taildy);
97
}
98
}
99
100
uint8x8_t vfmask;
101
uint8x8_t vtmask;
102
enum { offsetk = 1};
103
ptrdiff_t lookLeft;
104
ptrdiff_t lookRight;
105
};
106
107
template <bool L2gradient>
108
inline void ColFilter3x3Canny(const s16* src0, const s16* src1, const s16* src2, s16* dstx, s16* dsty, s32* mag, ptrdiff_t width)
109
{
110
ptrdiff_t j = 0;
111
for (; j <= width - 8; j += 8)
112
{
113
ColFilter3x3CannyL1Loop:
114
int16x8_t line0x = vld1q_s16(src0 + j);
115
int16x8_t line1x = vld1q_s16(src1 + j);
116
int16x8_t line2x = vld1q_s16(src2 + j);
117
int16x8_t line0y = vld1q_s16(src0 + j + width);
118
int16x8_t line2y = vld1q_s16(src2 + j + width);
119
120
int16x8_t l02 = vaddq_s16(line0x, line2x);
121
int16x8_t l1x2 = vshlq_n_s16(line1x, 1);
122
int16x8_t dy = vsubq_s16(line2y, line0y);
123
int16x8_t dx = vaddq_s16(l1x2, l02);
124
125
int16x8_t dya = vabsq_s16(dy);
126
int16x8_t dxa = vabsq_s16(dx);
127
int16x8_t norm = vaddq_s16(dya, dxa);
128
129
int32x4_t normh = vmovl_s16(vget_high_s16(norm));
130
int32x4_t norml = vmovl_s16(vget_low_s16(norm));
131
132
vst1q_s16(dsty + j, dy);
133
vst1q_s16(dstx + j, dx);
134
vst1q_s32(mag + j + 4, normh);
135
vst1q_s32(mag + j, norml);
136
}
137
if (j != width)
138
{
139
j = width - 8;
140
goto ColFilter3x3CannyL1Loop;
141
}
142
}
143
template <>
144
inline void ColFilter3x3Canny<true>(const s16* src0, const s16* src1, const s16* src2, s16* dstx, s16* dsty, s32* mag, ptrdiff_t width)
145
{
146
ptrdiff_t j = 0;
147
for (; j <= width - 8; j += 8)
148
{
149
ColFilter3x3CannyL2Loop:
150
int16x8_t line0x = vld1q_s16(src0 + j);
151
int16x8_t line1x = vld1q_s16(src1 + j);
152
int16x8_t line2x = vld1q_s16(src2 + j);
153
int16x8_t line0y = vld1q_s16(src0 + j + width);
154
int16x8_t line2y = vld1q_s16(src2 + j + width);
155
156
int16x8_t l02 = vaddq_s16(line0x, line2x);
157
int16x8_t l1x2 = vshlq_n_s16(line1x, 1);
158
int16x8_t dy = vsubq_s16(line2y, line0y);
159
int16x8_t dx = vaddq_s16(l1x2, l02);
160
161
int32x4_t norml = vmull_s16(vget_low_s16(dx), vget_low_s16(dx));
162
int32x4_t normh = vmull_s16(vget_high_s16(dy), vget_high_s16(dy));
163
164
norml = vmlal_s16(norml, vget_low_s16(dy), vget_low_s16(dy));
165
normh = vmlal_s16(normh, vget_high_s16(dx), vget_high_s16(dx));
166
167
vst1q_s16(dsty + j, dy);
168
vst1q_s16(dstx + j, dx);
169
vst1q_s32(mag + j, norml);
170
vst1q_s32(mag + j + 4, normh);
171
}
172
if (j != width)
173
{
174
j = width - 8;
175
goto ColFilter3x3CannyL2Loop;
176
}
177
}
178
179
template <bool L2gradient>
180
inline void NormCanny(const ptrdiff_t colscn, s16* _dx, s16* _dy, s32* _norm)
181
{
182
ptrdiff_t j = 0;
183
if (colscn >= 8)
184
{
185
int16x8_t vx = vld1q_s16(_dx);
186
int16x8_t vy = vld1q_s16(_dy);
187
for (; j <= colscn - 16; j+=8)
188
{
189
internal::prefetch(_dx);
190
internal::prefetch(_dy);
191
192
int16x8_t vx2 = vld1q_s16(_dx + j + 8);
193
int16x8_t vy2 = vld1q_s16(_dy + j + 8);
194
195
int16x8_t vabsx = vabsq_s16(vx);
196
int16x8_t vabsy = vabsq_s16(vy);
197
198
int16x8_t norm = vaddq_s16(vabsx, vabsy);
199
200
int32x4_t normh = vmovl_s16(vget_high_s16(norm));
201
int32x4_t norml = vmovl_s16(vget_low_s16(norm));
202
203
vst1q_s32(_norm + j + 4, normh);
204
vst1q_s32(_norm + j + 0, norml);
205
206
vx = vx2;
207
vy = vy2;
208
}
209
int16x8_t vabsx = vabsq_s16(vx);
210
int16x8_t vabsy = vabsq_s16(vy);
211
212
int16x8_t norm = vaddq_s16(vabsx, vabsy);
213
214
int32x4_t normh = vmovl_s16(vget_high_s16(norm));
215
int32x4_t norml = vmovl_s16(vget_low_s16(norm));
216
217
vst1q_s32(_norm + j + 4, normh);
218
vst1q_s32(_norm + j + 0, norml);
219
}
220
for (; j < colscn; j++)
221
_norm[j] = std::abs(s32(_dx[j])) + std::abs(s32(_dy[j]));
222
}
223
224
template <>
225
inline void NormCanny<true>(const ptrdiff_t colscn, s16* _dx, s16* _dy, s32* _norm)
226
{
227
ptrdiff_t j = 0;
228
if (colscn >= 8)
229
{
230
int16x8_t vx = vld1q_s16(_dx);
231
int16x8_t vy = vld1q_s16(_dy);
232
233
for (; j <= colscn - 16; j+=8)
234
{
235
internal::prefetch(_dx);
236
internal::prefetch(_dy);
237
238
int16x8_t vxnext = vld1q_s16(_dx + j + 8);
239
int16x8_t vynext = vld1q_s16(_dy + j + 8);
240
241
int32x4_t norml = vmull_s16(vget_low_s16(vx), vget_low_s16(vx));
242
int32x4_t normh = vmull_s16(vget_high_s16(vy), vget_high_s16(vy));
243
244
norml = vmlal_s16(norml, vget_low_s16(vy), vget_low_s16(vy));
245
normh = vmlal_s16(normh, vget_high_s16(vx), vget_high_s16(vx));
246
247
vst1q_s32(_norm + j + 0, norml);
248
vst1q_s32(_norm + j + 4, normh);
249
250
vx = vxnext;
251
vy = vynext;
252
}
253
int32x4_t norml = vmull_s16(vget_low_s16(vx), vget_low_s16(vx));
254
int32x4_t normh = vmull_s16(vget_high_s16(vy), vget_high_s16(vy));
255
256
norml = vmlal_s16(norml, vget_low_s16(vy), vget_low_s16(vy));
257
normh = vmlal_s16(normh, vget_high_s16(vx), vget_high_s16(vx));
258
259
vst1q_s32(_norm + j + 0, norml);
260
vst1q_s32(_norm + j + 4, normh);
261
}
262
for (; j < colscn; j++)
263
_norm[j] = s32(_dx[j])*_dx[j] + s32(_dy[j])*_dy[j];
264
}
265
266
template <bool L2gradient>
267
inline void prepareThresh(f64 low_thresh, f64 high_thresh,
268
s32 &low, s32 &high)
269
{
270
if (low_thresh > high_thresh)
271
std::swap(low_thresh, high_thresh);
272
#if defined __GNUC__
273
low = (s32)low_thresh;
274
high = (s32)high_thresh;
275
low -= (low > low_thresh);
276
high -= (high > high_thresh);
277
#else
278
low = internal::round(low_thresh);
279
high = internal::round(high_thresh);
280
f32 ldiff = (f32)(low_thresh - low);
281
f32 hdiff = (f32)(high_thresh - high);
282
low -= (ldiff < 0);
283
high -= (hdiff < 0);
284
#endif
285
}
286
template <>
287
inline void prepareThresh<true>(f64 low_thresh, f64 high_thresh,
288
s32 &low, s32 &high)
289
{
290
if (low_thresh > high_thresh)
291
std::swap(low_thresh, high_thresh);
292
if (low_thresh > 0) low_thresh *= low_thresh;
293
if (high_thresh > 0) high_thresh *= high_thresh;
294
#if defined __GNUC__
295
low = (s32)low_thresh;
296
high = (s32)high_thresh;
297
low -= (low > low_thresh);
298
high -= (high > high_thresh);
299
#else
300
low = internal::round(low_thresh);
301
high = internal::round(high_thresh);
302
f32 ldiff = (f32)(low_thresh - low);
303
f32 hdiff = (f32)(high_thresh - high);
304
low -= (ldiff < 0);
305
high -= (hdiff < 0);
306
#endif
307
}
308
309
template <bool L2gradient, bool externalSobel>
310
struct _normEstimator
311
{
312
ptrdiff_t magstep;
313
ptrdiff_t dxOffset;
314
ptrdiff_t dyOffset;
315
ptrdiff_t shxOffset;
316
ptrdiff_t shyOffset;
317
std::vector<u8> buffer;
318
const ptrdiff_t offsetk;
319
ptrdiff_t borderyt, borderyb;
320
RowFilter3x3Canny sobelRow;
321
322
inline _normEstimator(const Size2D &size, s32, Margin borderMargin,
323
ptrdiff_t &mapstep, s32** mag_buf, u8* &map):
324
offsetk(1),
325
sobelRow(std::max<ptrdiff_t>(0, offsetk - (ptrdiff_t)borderMargin.left),
326
std::max<ptrdiff_t>(0, offsetk - (ptrdiff_t)borderMargin.right))
327
{
328
mapstep = size.width + 2;
329
magstep = size.width + 2 + size.width * (4 * sizeof(s16)/sizeof(s32));
330
dxOffset = mapstep * sizeof(s32)/sizeof(s16);
331
dyOffset = dxOffset + size.width * 1;
332
shxOffset = dxOffset + size.width * 2;
333
shyOffset = dxOffset + size.width * 3;
334
buffer.resize( (size.width+2)*(size.height+2) + magstep*3*sizeof(s32) );
335
mag_buf[0] = (s32*)&buffer[0];
336
mag_buf[1] = mag_buf[0] + magstep;
337
mag_buf[2] = mag_buf[1] + magstep;
338
memset(mag_buf[0], 0, mapstep * sizeof(s32));
339
340
map = (u8*)(mag_buf[2] + magstep);
341
memset(map, 1, mapstep);
342
memset(map + mapstep*(size.height + 1), 1, mapstep);
343
borderyt = std::max<ptrdiff_t>(0, offsetk - (ptrdiff_t)borderMargin.top);
344
borderyb = std::max<ptrdiff_t>(0, offsetk - (ptrdiff_t)borderMargin.bottom);
345
}
346
inline void firstRow(const Size2D &size, s32,
347
const u8 *srcBase, ptrdiff_t srcStride,
348
s16*, ptrdiff_t,
349
s16*, ptrdiff_t,
350
s32** mag_buf)
351
{
352
//sobelH row #0
353
const u8* _src = internal::getRowPtr(srcBase, srcStride, 0);
354
sobelRow(_src, ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[0]) + shyOffset, size.width);
355
//sobelH row #1
356
_src = internal::getRowPtr(srcBase, srcStride, 1);
357
sobelRow(_src, ((s16*)mag_buf[1]) + shxOffset, ((s16*)mag_buf[1]) + shyOffset, size.width);
358
359
mag_buf[1][0] = mag_buf[1][size.width+1] = 0;
360
if (borderyt == 0)
361
{
362
//sobelH row #-1
363
_src = internal::getRowPtr(srcBase, srcStride, -1);
364
sobelRow(_src, ((s16*)mag_buf[2]) + shxOffset, ((s16*)mag_buf[2]) + shyOffset, size.width);
365
366
ColFilter3x3Canny<L2gradient>( ((s16*)mag_buf[2]) + shxOffset, ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[1]) + shxOffset,
367
((s16*)mag_buf[1]) + dxOffset, ((s16*)mag_buf[1]) + dyOffset, mag_buf[1] + 1, size.width);
368
}
369
else
370
{
371
ColFilter3x3Canny<L2gradient>( ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[1]) + shxOffset,
372
((s16*)mag_buf[1]) + dxOffset, ((s16*)mag_buf[1]) + dyOffset, mag_buf[1] + 1, size.width);
373
}
374
}
375
inline void nextRow(const Size2D &size, s32,
376
const u8 *srcBase, ptrdiff_t srcStride,
377
s16*, ptrdiff_t,
378
s16*, ptrdiff_t,
379
const ptrdiff_t &mapstep, s32** mag_buf,
380
size_t i, const s16* &_x, const s16* &_y)
381
{
382
mag_buf[2][0] = mag_buf[2][size.width+1] = 0;
383
if (i < size.height - borderyb)
384
{
385
const u8* _src = internal::getRowPtr(srcBase, srcStride, i+1);
386
//sobelH row #i+1
387
sobelRow(_src, ((s16*)mag_buf[2]) + shxOffset, ((s16*)mag_buf[2]) + shyOffset, size.width);
388
389
ColFilter3x3Canny<L2gradient>( ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[1]) + shxOffset, ((s16*)mag_buf[2]) + shxOffset,
390
((s16*)mag_buf[2]) + dxOffset, ((s16*)mag_buf[2]) + dyOffset, mag_buf[2] + 1, size.width);
391
}
392
else if (i < size.height)
393
{
394
ColFilter3x3Canny<L2gradient>( ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[1]) + shxOffset, ((s16*)mag_buf[1]) + shxOffset,
395
((s16*)mag_buf[2]) + dxOffset, ((s16*)mag_buf[2]) + dyOffset, mag_buf[2] + 1, size.width);
396
}
397
else
398
memset(mag_buf[2], 0, mapstep*sizeof(s32));
399
_x = ((s16*)mag_buf[1]) + dxOffset;
400
_y = ((s16*)mag_buf[1]) + dyOffset;
401
}
402
};
403
template <bool L2gradient>
404
struct _normEstimator<L2gradient, true>
405
{
406
std::vector<u8> buffer;
407
408
inline _normEstimator(const Size2D &size, s32 cn, Margin,
409
ptrdiff_t &mapstep, s32** mag_buf, u8* &map)
410
{
411
mapstep = size.width + 2;
412
buffer.resize( (size.width+2)*(size.height+2) + cn*mapstep*3*sizeof(s32) );
413
mag_buf[0] = (s32*)&buffer[0];
414
mag_buf[1] = mag_buf[0] + mapstep*cn;
415
mag_buf[2] = mag_buf[1] + mapstep*cn;
416
memset(mag_buf[0], 0, /* cn* */mapstep * sizeof(s32));
417
418
map = (u8*)(mag_buf[2] + mapstep*cn);
419
memset(map, 1, mapstep);
420
memset(map + mapstep*(size.height + 1), 1, mapstep);
421
}
422
inline void firstRow(const Size2D &size, s32 cn,
423
const u8 *, ptrdiff_t,
424
s16* dxBase, ptrdiff_t dxStride,
425
s16* dyBase, ptrdiff_t dyStride,
426
s32** mag_buf)
427
{
428
s32* _norm = mag_buf[1] + 1;
429
430
s16* _dx = internal::getRowPtr(dxBase, dxStride, 0);
431
s16* _dy = internal::getRowPtr(dyBase, dyStride, 0);
432
433
NormCanny<L2gradient>(size.width*cn, _dx, _dy, _norm);
434
435
if(cn > 1)
436
{
437
for(size_t j = 0, jn = 0; j < size.width; ++j, jn += cn)
438
{
439
size_t maxIdx = jn;
440
for(s32 k = 1; k < cn; ++k)
441
if(_norm[jn + k] > _norm[maxIdx]) maxIdx = jn + k;
442
_norm[j] = _norm[maxIdx];
443
_dx[j] = _dx[maxIdx];
444
_dy[j] = _dy[maxIdx];
445
}
446
}
447
448
_norm[-1] = _norm[size.width] = 0;
449
}
450
inline void nextRow(const Size2D &size, s32 cn,
451
const u8 *, ptrdiff_t,
452
s16* dxBase, ptrdiff_t dxStride,
453
s16* dyBase, ptrdiff_t dyStride,
454
const ptrdiff_t &mapstep, s32** mag_buf,
455
size_t i, const s16* &_x, const s16* &_y)
456
{
457
s32* _norm = mag_buf[(i > 0) + 1] + 1;
458
if (i < size.height)
459
{
460
s16* _dx = internal::getRowPtr(dxBase, dxStride, i);
461
s16* _dy = internal::getRowPtr(dyBase, dyStride, i);
462
463
NormCanny<L2gradient>(size.width*cn, _dx, _dy, _norm);
464
465
if(cn > 1)
466
{
467
for(size_t j = 0, jn = 0; j < size.width; ++j, jn += cn)
468
{
469
size_t maxIdx = jn;
470
for(s32 k = 1; k < cn; ++k)
471
if(_norm[jn + k] > _norm[maxIdx]) maxIdx = jn + k;
472
_norm[j] = _norm[maxIdx];
473
_dx[j] = _dx[maxIdx];
474
_dy[j] = _dy[maxIdx];
475
}
476
}
477
478
_norm[-1] = _norm[size.width] = 0;
479
}
480
else
481
memset(_norm-1, 0, /* cn* */mapstep*sizeof(s32));
482
483
_x = internal::getRowPtr(dxBase, dxStride, i-1);
484
_y = internal::getRowPtr(dyBase, dyStride, i-1);
485
}
486
};
487
488
template <bool L2gradient, bool externalSobel>
489
inline void Canny3x3(const Size2D &size, s32 cn,
490
const u8 * srcBase, ptrdiff_t srcStride,
491
u8 * dstBase, ptrdiff_t dstStride,
492
s16 * dxBase, ptrdiff_t dxStride,
493
s16 * dyBase, ptrdiff_t dyStride,
494
f64 low_thresh, f64 high_thresh,
495
Margin borderMargin)
496
{
497
s32 low, high;
498
prepareThresh<L2gradient>(low_thresh, high_thresh, low, high);
499
500
ptrdiff_t mapstep;
501
s32* mag_buf[3];
502
u8* map;
503
_normEstimator<L2gradient, externalSobel> normEstimator(size, cn, borderMargin, mapstep, mag_buf, map);
504
505
size_t maxsize = std::max<size_t>( 1u << 10, size.width * size.height / 10 );
506
std::vector<u8*> stack( maxsize );
507
u8 **stack_top = &stack[0];
508
u8 **stack_bottom = &stack[0];
509
510
/* sector numbers
511
(Top-Left Origin)
512
513
1 2 3
514
* * *
515
* * *
516
0*******0
517
* * *
518
* * *
519
3 2 1
520
*/
521
522
#define CANNY_PUSH(d) *(d) = u8(2), *stack_top++ = (d)
523
#define CANNY_POP(d) (d) = *--stack_top
524
525
//i == 0
526
normEstimator.firstRow(size, cn, srcBase, srcStride, dxBase, dxStride, dyBase, dyStride, mag_buf);
527
// calculate magnitude and angle of gradient, perform non-maxima supression.
528
// fill the map with one of the following values:
529
// 0 - the pixel might belong to an edge
530
// 1 - the pixel can not belong to an edge
531
// 2 - the pixel does belong to an edge
532
for (size_t i = 1; i <= size.height; i++)
533
{
534
const s16 *_x, *_y;
535
normEstimator.nextRow(size, cn, srcBase, srcStride, dxBase, dxStride, dyBase, dyStride, mapstep, mag_buf, i, _x, _y);
536
537
u8* _map = map + mapstep*i + 1;
538
_map[-1] = _map[size.width] = 1;
539
540
s32* _mag = mag_buf[1] + 1; // take the central row
541
ptrdiff_t magstep1 = mag_buf[2] - mag_buf[1];
542
ptrdiff_t magstep2 = mag_buf[0] - mag_buf[1];
543
544
if ((stack_top - stack_bottom) + size.width > maxsize)
545
{
546
ptrdiff_t sz = (ptrdiff_t)(stack_top - stack_bottom);
547
maxsize = maxsize * 3/2;
548
stack.resize(maxsize);
549
stack_bottom = &stack[0];
550
stack_top = stack_bottom + sz;
551
}
552
553
s32 prev_flag = 0;
554
for (ptrdiff_t j = 0; j < (ptrdiff_t)size.width; j++)
555
{
556
#define CANNY_SHIFT 15
557
const s32 TG22 = (s32)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5);
558
559
s32 m = _mag[j];
560
561
if (m > low)
562
{
563
s32 xs = _x[j];
564
s32 ys = _y[j];
565
s32 x = abs(xs);
566
s32 y = abs(ys) << CANNY_SHIFT;
567
568
s32 tg22x = x * TG22;
569
570
if (y < tg22x)
571
{
572
if (m > _mag[j-1] && m >= _mag[j+1]) goto __push;
573
}
574
else
575
{
576
s32 tg67x = tg22x + (x << (CANNY_SHIFT+1));
577
if (y > tg67x)
578
{
579
if (m > _mag[j+magstep2] && m >= _mag[j+magstep1]) goto __push;
580
}
581
else
582
{
583
s32 s = (xs ^ ys) < 0 ? -1 : 1;
584
if(m > _mag[j+magstep2-s] && m > _mag[j+magstep1+s]) goto __push;
585
}
586
}
587
}
588
prev_flag = 0;
589
_map[j] = u8(1);
590
continue;
591
__push:
592
if (!prev_flag && m > high && _map[j-mapstep] != 2)
593
{
594
CANNY_PUSH(_map + j);
595
prev_flag = 1;
596
}
597
else
598
_map[j] = 0;
599
}
600
601
// scroll the ring buffer
602
_mag = mag_buf[0];
603
mag_buf[0] = mag_buf[1];
604
mag_buf[1] = mag_buf[2];
605
mag_buf[2] = _mag;
606
}
607
608
// now track the edges (hysteresis thresholding)
609
while (stack_top > stack_bottom)
610
{
611
u8* m;
612
if ((size_t)(stack_top - stack_bottom) + 8u > maxsize)
613
{
614
ptrdiff_t sz = (ptrdiff_t)(stack_top - stack_bottom);
615
maxsize = maxsize * 3/2;
616
stack.resize(maxsize);
617
stack_bottom = &stack[0];
618
stack_top = stack_bottom + sz;
619
}
620
621
CANNY_POP(m);
622
623
if (!m[-1]) CANNY_PUSH(m - 1);
624
if (!m[1]) CANNY_PUSH(m + 1);
625
if (!m[-mapstep-1]) CANNY_PUSH(m - mapstep - 1);
626
if (!m[-mapstep]) CANNY_PUSH(m - mapstep);
627
if (!m[-mapstep+1]) CANNY_PUSH(m - mapstep + 1);
628
if (!m[mapstep-1]) CANNY_PUSH(m + mapstep - 1);
629
if (!m[mapstep]) CANNY_PUSH(m + mapstep);
630
if (!m[mapstep+1]) CANNY_PUSH(m + mapstep + 1);
631
}
632
633
// the final pass, form the final image
634
uint8x16_t v2 = vmovq_n_u8(2);
635
const u8* ptrmap = map + mapstep + 1;
636
for (size_t i = 0; i < size.height; i++, ptrmap += mapstep)
637
{
638
u8* _dst = internal::getRowPtr(dstBase, dstStride, i);
639
ptrdiff_t j = 0;
640
for (; j < (ptrdiff_t)size.width - 16; j += 16)
641
{
642
internal::prefetch(ptrmap);
643
uint8x16_t vmap = vld1q_u8(ptrmap + j);
644
uint8x16_t vdst = vceqq_u8(vmap, v2);
645
vst1q_u8(_dst+j, vdst);
646
}
647
for (; j < (ptrdiff_t)size.width; j++)
648
_dst[j] = (u8)-(ptrmap[j] >> 1);
649
}
650
}
651
652
} // namespace
653
#endif
654
655
bool isCanny3x3Supported(const Size2D &size)
656
{
657
return isSupportedConfiguration() &&
658
size.height >= 2 && size.width >= 9;
659
}
660
661
void Canny3x3L1(const Size2D &size,
662
const u8 * srcBase, ptrdiff_t srcStride,
663
u8 * dstBase, ptrdiff_t dstStride,
664
f64 low_thresh, f64 high_thresh,
665
Margin borderMargin)
666
{
667
internal::assertSupportedConfiguration(isCanny3x3Supported(size));
668
#ifdef CAROTENE_NEON
669
Canny3x3<false, false>(size, 1,
670
srcBase, srcStride,
671
dstBase, dstStride,
672
NULL, 0,
673
NULL, 0,
674
low_thresh, high_thresh,
675
borderMargin);
676
#else
677
(void)size;
678
(void)srcBase;
679
(void)srcStride;
680
(void)dstBase;
681
(void)dstStride;
682
(void)low_thresh;
683
(void)high_thresh;
684
(void)borderMargin;
685
#endif
686
}
687
688
void Canny3x3L2(const Size2D &size,
689
const u8 * srcBase, ptrdiff_t srcStride,
690
u8 * dstBase, ptrdiff_t dstStride,
691
f64 low_thresh, f64 high_thresh,
692
Margin borderMargin)
693
{
694
internal::assertSupportedConfiguration(isCanny3x3Supported(size));
695
#ifdef CAROTENE_NEON
696
Canny3x3<true, false>(size, 1,
697
srcBase, srcStride,
698
dstBase, dstStride,
699
NULL, 0,
700
NULL, 0,
701
low_thresh, high_thresh,
702
borderMargin);
703
#else
704
(void)size;
705
(void)srcBase;
706
(void)srcStride;
707
(void)dstBase;
708
(void)dstStride;
709
(void)low_thresh;
710
(void)high_thresh;
711
(void)borderMargin;
712
#endif
713
}
714
715
void Canny3x3L1(const Size2D &size, s32 cn,
716
s16 * dxBase, ptrdiff_t dxStride,
717
s16 * dyBase, ptrdiff_t dyStride,
718
u8 * dstBase, ptrdiff_t dstStride,
719
f64 low_thresh, f64 high_thresh)
720
{
721
internal::assertSupportedConfiguration();
722
#ifdef CAROTENE_NEON
723
Canny3x3<false, true>(size, cn,
724
NULL, 0,
725
dstBase, dstStride,
726
dxBase, dxStride,
727
dyBase, dyStride,
728
low_thresh, high_thresh,
729
Margin());
730
#else
731
(void)size;
732
(void)cn;
733
(void)dstBase;
734
(void)dstStride;
735
(void)dxBase;
736
(void)dxStride;
737
(void)dyBase;
738
(void)dyStride;
739
(void)low_thresh;
740
(void)high_thresh;
741
#endif
742
}
743
744
void Canny3x3L2(const Size2D &size, s32 cn,
745
s16 * dxBase, ptrdiff_t dxStride,
746
s16 * dyBase, ptrdiff_t dyStride,
747
u8 * dstBase, ptrdiff_t dstStride,
748
f64 low_thresh, f64 high_thresh)
749
{
750
internal::assertSupportedConfiguration();
751
#ifdef CAROTENE_NEON
752
Canny3x3<true, true>(size, cn,
753
NULL, 0,
754
dstBase, dstStride,
755
dxBase, dxStride,
756
dyBase, dyStride,
757
low_thresh, high_thresh,
758
Margin());
759
#else
760
(void)size;
761
(void)cn;
762
(void)dstBase;
763
(void)dstStride;
764
(void)dxBase;
765
(void)dxStride;
766
(void)dyBase;
767
(void)dyStride;
768
(void)low_thresh;
769
(void)high_thresh;
770
#endif
771
}
772
773
} // namespace CAROTENE_NS
774
775