Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/3rdparty/carotene/src/fast.cpp
16337 views
1
/*
2
* By downloading, copying, installing or using the software you agree to this license.
3
* If you do not agree to this license, do not download, install,
4
* copy or use the software.
5
*
6
*
7
* License Agreement
8
* For Open Source Computer Vision Library
9
* (3-clause BSD License)
10
*
11
* Copyright (C) 2012-2015, NVIDIA Corporation, all rights reserved.
12
* Third party copyrights are property of their respective owners.
13
*
14
* Redistribution and use in source and binary forms, with or without modification,
15
* are permitted provided that the following conditions are met:
16
*
17
* * Redistributions of source code must retain the above copyright notice,
18
* this list of conditions and the following disclaimer.
19
*
20
* * Redistributions in binary form must reproduce the above copyright notice,
21
* this list of conditions and the following disclaimer in the documentation
22
* and/or other materials provided with the distribution.
23
*
24
* * Neither the names of the copyright holders nor the names of the contributors
25
* may be used to endorse or promote products derived from this software
26
* without specific prior written permission.
27
*
28
* This software is provided by the copyright holders and contributors "as is" and
29
* any express or implied warranties, including, but not limited to, the implied
30
* warranties of merchantability and fitness for a particular purpose are disclaimed.
31
* In no event shall copyright holders or contributors be liable for any direct,
32
* indirect, incidental, special, exemplary, or consequential damages
33
* (including, but not limited to, procurement of substitute goods or services;
34
* loss of use, data, or profits; or business interruption) however caused
35
* and on any theory of liability, whether in contract, strict liability,
36
* or tort (including negligence or otherwise) arising in any way out of
37
* the use of this software, even if advised of the possibility of such damage.
38
*/
39
40
41
/* This is FAST corner detector, contributed to OpenCV by the author, Edward Rosten.
42
Below is the original copyright and the references */
43
44
/*
45
Copyright (c) 2006, 2008 Edward Rosten
46
All rights reserved.
47
48
Redistribution and use in source and binary forms, with or without
49
modification, are permitted provided that the following conditions
50
are met:
51
52
*Redistributions of source code must retain the above copyright
53
notice, this list of conditions and the following disclaimer.
54
55
*Redistributions in binary form must reproduce the above copyright
56
notice, this list of conditions and the following disclaimer in the
57
documentation and/or other materials provided with the distribution.
58
59
*Neither the name of the University of Cambridge nor the names of
60
its contributors may be used to endorse or promote products derived
61
from this software without specific prior written permission.
62
63
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
64
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
65
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
66
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
67
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
68
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
69
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
70
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
71
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
72
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
73
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
74
*/
75
76
/*
77
The references are:
78
* Machine learning for high-speed corner detection,
79
E. Rosten and T. Drummond, ECCV 2006
80
* Faster and better: A machine learning approach to corner detection
81
E. Rosten, R. Porter and T. Drummond, PAMI, 2009
82
*/
83
84
#include "common.hpp"
85
86
#include <vector>
87
#include <cstring>
88
89
namespace CAROTENE_NS {
90
91
#ifdef CAROTENE_NEON
92
namespace
93
{
94
95
void makeOffsets(ptrdiff_t pixel[], ptrdiff_t row_stride)
96
{
97
pixel[0] = 0 + row_stride * 3;
98
pixel[1] = 1 + row_stride * 3;
99
pixel[2] = 2 + row_stride * 2;
100
pixel[3] = 3 + row_stride * 1;
101
pixel[4] = 3 + row_stride * 0;
102
pixel[5] = 3 + row_stride * -1;
103
pixel[6] = 2 + row_stride * -2;
104
pixel[7] = 1 + row_stride * -3;
105
pixel[8] = 0 + row_stride * -3;
106
pixel[9] = -1 + row_stride * -3;
107
pixel[10] = -2 + row_stride * -2;
108
pixel[11] = -3 + row_stride * -1;
109
pixel[12] = -3 + row_stride * 0;
110
pixel[13] = -3 + row_stride * 1;
111
pixel[14] = -2 + row_stride * 2;
112
pixel[15] = -1 + row_stride * 3;
113
}
114
115
u8 cornerScore(const u8* ptr, const ptrdiff_t pixel[])
116
{
117
const s32 K = 8, N = 16 + K + 1;
118
s32 k, v = ptr[0];
119
s16 d[(N + 7) & ~7];
120
for( k = 0; k < N; k++ )
121
d[k] = (s16)(v - ptr[pixel[k]]);
122
123
int16x8_t q0 = vdupq_n_s16((s16)(-1000));
124
int16x8_t q1 = vdupq_n_s16((s16)(1000));
125
126
int16x8_t d0_7 = vld1q_s16(d + 0);
127
int16x8_t d8_15 = vld1q_s16(d + 8);
128
int16x8_t d16_23 = vld1q_s16(d + 16);
129
int16x8_t d24 = vld1q_s16(d + 24);
130
131
//k == 0
132
int16x8_t v0k0 = vextq_s16(d0_7, d8_15, 1);
133
int16x8_t v1k0 = vextq_s16(d0_7, d8_15, 2);
134
int16x8_t ak0 = vminq_s16(v0k0, v1k0);
135
int16x8_t bk0 = vmaxq_s16(v0k0, v1k0);
136
137
v0k0 = vextq_s16(d0_7, d8_15, 3);
138
ak0 = vminq_s16(ak0, v0k0);
139
bk0 = vmaxq_s16(bk0, v0k0);
140
141
v1k0 = vextq_s16(d0_7, d8_15, 4);
142
ak0 = vminq_s16(ak0, v1k0);
143
bk0 = vmaxq_s16(bk0, v1k0);
144
145
v0k0 = vextq_s16(d0_7, d8_15, 5);
146
ak0 = vminq_s16(ak0, v0k0);
147
bk0 = vmaxq_s16(bk0, v0k0);
148
149
v1k0 = vextq_s16(d0_7, d8_15, 6);
150
ak0 = vminq_s16(ak0, v1k0);
151
bk0 = vmaxq_s16(bk0, v1k0);
152
153
v0k0 = vextq_s16(d0_7, d8_15, 7);
154
ak0 = vminq_s16(ak0, v0k0);
155
bk0 = vmaxq_s16(bk0, v0k0);
156
157
ak0 = vminq_s16(ak0, d8_15);
158
bk0 = vmaxq_s16(bk0, d8_15);
159
160
q0 = vmaxq_s16(q0, vminq_s16(ak0, d0_7));
161
q1 = vminq_s16(q1, vmaxq_s16(bk0, d0_7));
162
163
v1k0 = vextq_s16(d8_15, d16_23, 1);
164
q0 = vmaxq_s16(q0, vminq_s16(ak0, v1k0));
165
q1 = vminq_s16(q1, vmaxq_s16(bk0, v1k0));
166
167
//k == 8
168
int16x8_t v0k8 = v1k0;
169
int16x8_t v1k8 = vextq_s16(d8_15, d16_23, 2);
170
int16x8_t ak8 = vminq_s16(v0k8, v1k8);
171
int16x8_t bk8 = vmaxq_s16(v0k8, v1k8);
172
173
v0k8 = vextq_s16(d8_15, d16_23, 3);
174
ak8 = vminq_s16(ak8, v0k8);
175
bk8 = vmaxq_s16(bk8, v0k8);
176
177
v1k8 = vextq_s16(d8_15, d16_23, 4);
178
ak8 = vminq_s16(ak8, v1k8);
179
bk8 = vmaxq_s16(bk8, v1k8);
180
181
v0k8 = vextq_s16(d8_15, d16_23, 5);
182
ak8 = vminq_s16(ak8, v0k8);
183
bk8 = vmaxq_s16(bk8, v0k8);
184
185
v1k8 = vextq_s16(d8_15, d16_23, 6);
186
ak8 = vminq_s16(ak8, v1k8);
187
bk8 = vmaxq_s16(bk8, v1k8);
188
189
v0k8 = vextq_s16(d8_15, d16_23, 7);
190
ak8 = vminq_s16(ak8, v0k8);
191
bk8 = vmaxq_s16(bk8, v0k8);
192
193
ak8 = vminq_s16(ak8, d16_23);
194
bk8 = vmaxq_s16(bk8, d16_23);
195
196
q0 = vmaxq_s16(q0, vminq_s16(ak8, d8_15));
197
q1 = vminq_s16(q1, vmaxq_s16(bk8, d8_15));
198
199
v1k8 = vextq_s16(d16_23, d24, 1);
200
q0 = vmaxq_s16(q0, vminq_s16(ak8, v1k8));
201
q1 = vminq_s16(q1, vmaxq_s16(bk8, v1k8));
202
203
//fin
204
int16x8_t q = vmaxq_s16(q0, vsubq_s16(vmovq_n_s16(0), q1));
205
int16x4_t q2 = vmax_s16(vget_low_s16(q), vget_high_s16(q));
206
int32x4_t q2w = vmovl_s16(q2);
207
int32x2_t q4 = vmax_s32(vget_low_s32(q2w), vget_high_s32(q2w));
208
int32x2_t q8 = vmax_s32(q4, vreinterpret_s32_s64(vshr_n_s64(vreinterpret_s64_s32(q4), 32)));
209
210
return (u8)(vget_lane_s32(q8, 0) - 1);
211
}
212
213
} //namespace
214
#endif
215
216
void FAST(const Size2D &size,
217
u8 *srcBase, ptrdiff_t srcStride,
218
KeypointStore *keypoints,
219
u8 threshold, bool nonmax_suppression)
220
{
221
internal::assertSupportedConfiguration();
222
#ifdef CAROTENE_NEON
223
//keypoints.clear();
224
225
const s32 K = 8, N = 16 + K + 1;
226
ptrdiff_t i, j, k, pixel[N];
227
makeOffsets(pixel, srcStride);
228
for(k = 16; k < N; k++)
229
pixel[k] = pixel[k - 16];
230
231
uint8x16_t delta = vdupq_n_u8(128);
232
uint8x16_t t = vdupq_n_u8(threshold);
233
uint8x16_t K16 = vdupq_n_u8((u8)K);
234
235
u8 threshold_tab[512];
236
for( i = -255; i <= 255; i++ )
237
threshold_tab[i+255] = (u8)(i < -threshold ? 1 : i > threshold ? 2 : 0);
238
239
std::vector<u8> _buf((size.width+16)*3*(sizeof(ptrdiff_t) + sizeof(u8)) + 128);
240
u8* buf[3];
241
buf[0] = &_buf[0]; buf[1] = buf[0] + size.width; buf[2] = buf[1] + size.width;
242
ptrdiff_t* cpbuf[3];
243
cpbuf[0] = (ptrdiff_t*)internal::alignPtr(buf[2] + size.width, sizeof(ptrdiff_t)) + 1;
244
cpbuf[1] = cpbuf[0] + size.width + 1;
245
cpbuf[2] = cpbuf[1] + size.width + 1;
246
memset(buf[0], 0, size.width*3);
247
248
for(i = 3; i < (ptrdiff_t)size.height-2; i++)
249
{
250
const u8* ptr = internal::getRowPtr(srcBase, srcStride, i) + 3;
251
u8* curr = buf[(i - 3)%3];
252
ptrdiff_t* cornerpos = cpbuf[(i - 3)%3];
253
memset(curr, 0, size.width);
254
ptrdiff_t ncorners = 0;
255
256
if( i < (ptrdiff_t)size.height - 3 )
257
{
258
j = 3;
259
260
for(; j < (ptrdiff_t)size.width - 16 - 3; j += 16, ptr += 16)
261
{
262
internal::prefetch(ptr);
263
internal::prefetch(ptr + pixel[0]);
264
internal::prefetch(ptr + pixel[2]);
265
266
uint8x16_t v0 = vld1q_u8(ptr);
267
int8x16_t v1 = vreinterpretq_s8_u8(veorq_u8(vqsubq_u8(v0, t), delta));
268
int8x16_t v2 = vreinterpretq_s8_u8(veorq_u8(vqaddq_u8(v0, t), delta));
269
270
int8x16_t x0 = vreinterpretq_s8_u8(vsubq_u8(vld1q_u8(ptr + pixel[0]), delta));
271
int8x16_t x1 = vreinterpretq_s8_u8(vsubq_u8(vld1q_u8(ptr + pixel[4]), delta));
272
int8x16_t x2 = vreinterpretq_s8_u8(vsubq_u8(vld1q_u8(ptr + pixel[8]), delta));
273
int8x16_t x3 = vreinterpretq_s8_u8(vsubq_u8(vld1q_u8(ptr + pixel[12]), delta));
274
275
uint8x16_t m0 = vandq_u8(vcgtq_s8(x0, v2), vcgtq_s8(x1, v2));
276
uint8x16_t m1 = vandq_u8(vcgtq_s8(v1, x0), vcgtq_s8(v1, x1));
277
m0 = vorrq_u8(m0, vandq_u8(vcgtq_s8(x1, v2), vcgtq_s8(x2, v2)));
278
m1 = vorrq_u8(m1, vandq_u8(vcgtq_s8(v1, x1), vcgtq_s8(v1, x2)));
279
m0 = vorrq_u8(m0, vandq_u8(vcgtq_s8(x2, v2), vcgtq_s8(x3, v2)));
280
m1 = vorrq_u8(m1, vandq_u8(vcgtq_s8(v1, x2), vcgtq_s8(v1, x3)));
281
m0 = vorrq_u8(m0, vandq_u8(vcgtq_s8(x3, v2), vcgtq_s8(x0, v2)));
282
m1 = vorrq_u8(m1, vandq_u8(vcgtq_s8(v1, x3), vcgtq_s8(v1, x0)));
283
m0 = vorrq_u8(m0, m1);
284
285
u64 mask[2];
286
vst1q_u64(mask, vreinterpretq_u64_u8(m0));
287
288
if( mask[0] == 0 )
289
{
290
if (mask[1] != 0)
291
{
292
j -= 8;
293
ptr -= 8;
294
}
295
continue;
296
}
297
298
uint8x16_t c0 = vmovq_n_u8(0);
299
uint8x16_t c1 = vmovq_n_u8(0);
300
uint8x16_t max0 = vmovq_n_u8(0);
301
uint8x16_t max1 = vmovq_n_u8(0);
302
for( k = 0; k < N; k++ )
303
{
304
int8x16_t x = vreinterpretq_s8_u8(veorq_u8(vld1q_u8(ptr + pixel[k]), delta));
305
m0 = vcgtq_s8(x, v2);
306
m1 = vcgtq_s8(v1, x);
307
308
c0 = vandq_u8(vsubq_u8(c0, m0), m0);
309
c1 = vandq_u8(vsubq_u8(c1, m1), m1);
310
311
max0 = vmaxq_u8(max0, c0);
312
max1 = vmaxq_u8(max1, c1);
313
}
314
315
max0 = vmaxq_u8(max0, max1);
316
u8 m[16];
317
vst1q_u8(m, vcgtq_u8(max0, K16));
318
319
for( k = 0; k < 16; ++k )
320
if(m[k])
321
{
322
cornerpos[ncorners++] = j+k;
323
if(nonmax_suppression)
324
curr[j+k] = cornerScore(ptr+k, pixel);
325
}
326
}
327
328
for( ; j < (s32)size.width - 3; j++, ptr++ )
329
{
330
s32 v = ptr[0];
331
const u8* tab = &threshold_tab[0] - v + 255;
332
s32 d = tab[ptr[pixel[0]]] | tab[ptr[pixel[8]]];
333
334
if( d == 0 )
335
continue;
336
337
d &= tab[ptr[pixel[2]]] | tab[ptr[pixel[10]]];
338
d &= tab[ptr[pixel[4]]] | tab[ptr[pixel[12]]];
339
d &= tab[ptr[pixel[6]]] | tab[ptr[pixel[14]]];
340
341
if( d == 0 )
342
continue;
343
344
d &= tab[ptr[pixel[1]]] | tab[ptr[pixel[9]]];
345
d &= tab[ptr[pixel[3]]] | tab[ptr[pixel[11]]];
346
d &= tab[ptr[pixel[5]]] | tab[ptr[pixel[13]]];
347
d &= tab[ptr[pixel[7]]] | tab[ptr[pixel[15]]];
348
349
if( d & 1 )
350
{
351
s32 vt = v - threshold, count = 0;
352
353
for( k = 0; k < N; k++ )
354
{
355
s32 x = ptr[pixel[k]];
356
if(x < vt)
357
{
358
if( ++count > K )
359
{
360
cornerpos[ncorners++] = j;
361
if(nonmax_suppression)
362
curr[j] = cornerScore(ptr, pixel);
363
break;
364
}
365
}
366
else
367
count = 0;
368
}
369
}
370
371
if( d & 2 )
372
{
373
s32 vt = v + threshold, count = 0;
374
375
for( k = 0; k < N; k++ )
376
{
377
s32 x = ptr[pixel[k]];
378
if(x > vt)
379
{
380
if( ++count > K )
381
{
382
cornerpos[ncorners++] = j;
383
if(nonmax_suppression)
384
curr[j] = cornerScore(ptr, pixel);
385
break;
386
}
387
}
388
else
389
count = 0;
390
}
391
}
392
}
393
}
394
395
cornerpos[-1] = ncorners;
396
397
if( i == 3 )
398
continue;
399
400
const u8* prev = buf[(i - 4 + 3)%3];
401
const u8* pprev = buf[(i - 5 + 3)%3];
402
cornerpos = cpbuf[(i - 4 + 3)%3];
403
ncorners = cornerpos[-1];
404
405
for( k = 0; k < ncorners; k++ )
406
{
407
j = cornerpos[k];
408
s32 score = prev[j];
409
if( !nonmax_suppression ||
410
(score > prev[j+1] && score > prev[j-1] &&
411
score > pprev[j-1] && score > pprev[j] && score > pprev[j+1] &&
412
score > curr[j-1] && score > curr[j] && score > curr[j+1]) )
413
{
414
keypoints->push((f32)j, (f32)(i-1), 7.f, -1, (f32)score);
415
}
416
}
417
}
418
#else
419
(void)size;
420
(void)srcBase;
421
(void)srcStride;
422
(void)keypoints;
423
(void)threshold;
424
(void)nonmax_suppression;
425
#endif
426
}
427
428
} // namespace CAROTENE_NS
429
430