Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/features2d/src/opencl/fast.cl
16339 views
1
// OpenCL port of the FAST corner detector.
2
// Copyright (C) 2014, Itseez Inc. See the license at http://opencv.org
3
4
inline int cornerScore(__global const uchar* img, int step)
5
{
6
int k, tofs, v = img[0], a0 = 0, b0;
7
int d[16];
8
#define LOAD2(idx, ofs) \
9
tofs = ofs; d[idx] = (short)(v - img[tofs]); d[idx+8] = (short)(v - img[-tofs])
10
LOAD2(0, 3);
11
LOAD2(1, -step+3);
12
LOAD2(2, -step*2+2);
13
LOAD2(3, -step*3+1);
14
LOAD2(4, -step*3);
15
LOAD2(5, -step*3-1);
16
LOAD2(6, -step*2-2);
17
LOAD2(7, -step-3);
18
19
#pragma unroll
20
for( k = 0; k < 16; k += 2 )
21
{
22
int a = min((int)d[(k+1)&15], (int)d[(k+2)&15]);
23
a = min(a, (int)d[(k+3)&15]);
24
a = min(a, (int)d[(k+4)&15]);
25
a = min(a, (int)d[(k+5)&15]);
26
a = min(a, (int)d[(k+6)&15]);
27
a = min(a, (int)d[(k+7)&15]);
28
a = min(a, (int)d[(k+8)&15]);
29
a0 = max(a0, min(a, (int)d[k&15]));
30
a0 = max(a0, min(a, (int)d[(k+9)&15]));
31
}
32
33
b0 = -a0;
34
#pragma unroll
35
for( k = 0; k < 16; k += 2 )
36
{
37
int b = max((int)d[(k+1)&15], (int)d[(k+2)&15]);
38
b = max(b, (int)d[(k+3)&15]);
39
b = max(b, (int)d[(k+4)&15]);
40
b = max(b, (int)d[(k+5)&15]);
41
b = max(b, (int)d[(k+6)&15]);
42
b = max(b, (int)d[(k+7)&15]);
43
b = max(b, (int)d[(k+8)&15]);
44
45
b0 = min(b0, max(b, (int)d[k]));
46
b0 = min(b0, max(b, (int)d[(k+9)&15]));
47
}
48
49
return -b0-1;
50
}
51
52
__kernel
53
void FAST_findKeypoints(
54
__global const uchar * _img, int step, int img_offset,
55
int img_rows, int img_cols,
56
volatile __global int* kp_loc,
57
int max_keypoints, int threshold )
58
{
59
int j = get_global_id(0) + 3;
60
int i = get_global_id(1) + 3;
61
62
if (i < img_rows - 3 && j < img_cols - 3)
63
{
64
__global const uchar* img = _img + mad24(i, step, j + img_offset);
65
int v = img[0], t0 = v - threshold, t1 = v + threshold;
66
int k, tofs, v0, v1;
67
int m0 = 0, m1 = 0;
68
69
#define UPDATE_MASK(idx, ofs) \
70
tofs = ofs; v0 = img[tofs]; v1 = img[-tofs]; \
71
m0 |= ((v0 < t0) << idx) | ((v1 < t0) << (8 + idx)); \
72
m1 |= ((v0 > t1) << idx) | ((v1 > t1) << (8 + idx))
73
74
UPDATE_MASK(0, 3);
75
if( (m0 | m1) == 0 )
76
return;
77
78
UPDATE_MASK(2, -step*2+2);
79
UPDATE_MASK(4, -step*3);
80
UPDATE_MASK(6, -step*2-2);
81
82
#define EVEN_MASK (1+4+16+64)
83
84
if( ((m0 | (m0 >> 8)) & EVEN_MASK) != EVEN_MASK &&
85
((m1 | (m1 >> 8)) & EVEN_MASK) != EVEN_MASK )
86
return;
87
88
UPDATE_MASK(1, -step+3);
89
UPDATE_MASK(3, -step*3+1);
90
UPDATE_MASK(5, -step*3-1);
91
UPDATE_MASK(7, -step-3);
92
if( ((m0 | (m0 >> 8)) & 255) != 255 &&
93
((m1 | (m1 >> 8)) & 255) != 255 )
94
return;
95
96
m0 |= m0 << 16;
97
m1 |= m1 << 16;
98
99
#define CHECK0(i) ((m0 & (511 << i)) == (511 << i))
100
#define CHECK1(i) ((m1 & (511 << i)) == (511 << i))
101
102
if( CHECK0(0) + CHECK0(1) + CHECK0(2) + CHECK0(3) +
103
CHECK0(4) + CHECK0(5) + CHECK0(6) + CHECK0(7) +
104
CHECK0(8) + CHECK0(9) + CHECK0(10) + CHECK0(11) +
105
CHECK0(12) + CHECK0(13) + CHECK0(14) + CHECK0(15) +
106
107
CHECK1(0) + CHECK1(1) + CHECK1(2) + CHECK1(3) +
108
CHECK1(4) + CHECK1(5) + CHECK1(6) + CHECK1(7) +
109
CHECK1(8) + CHECK1(9) + CHECK1(10) + CHECK1(11) +
110
CHECK1(12) + CHECK1(13) + CHECK1(14) + CHECK1(15) == 0 )
111
return;
112
113
{
114
int idx = atomic_inc(kp_loc);
115
if( idx < max_keypoints )
116
{
117
kp_loc[1 + 2*idx] = j;
118
kp_loc[2 + 2*idx] = i;
119
}
120
}
121
}
122
}
123
124
///////////////////////////////////////////////////////////////////////////
125
// nonmaxSupression
126
127
__kernel
128
void FAST_nonmaxSupression(
129
__global const int* kp_in, volatile __global int* kp_out,
130
__global const uchar * _img, int step, int img_offset,
131
int rows, int cols, int counter, int max_keypoints)
132
{
133
const int idx = get_global_id(0);
134
135
if (idx < counter)
136
{
137
int x = kp_in[1 + 2*idx];
138
int y = kp_in[2 + 2*idx];
139
__global const uchar* img = _img + mad24(y, step, x + img_offset);
140
141
int s = cornerScore(img, step);
142
143
if( (x < 4 || s > cornerScore(img-1, step)) +
144
(y < 4 || s > cornerScore(img-step, step)) != 2 )
145
return;
146
if( (x >= cols - 4 || s > cornerScore(img+1, step)) +
147
(y >= rows - 4 || s > cornerScore(img+step, step)) +
148
(x < 4 || y < 4 || s > cornerScore(img-step-1, step)) +
149
(x >= cols - 4 || y < 4 || s > cornerScore(img-step+1, step)) +
150
(x < 4 || y >= rows - 4 || s > cornerScore(img+step-1, step)) +
151
(x >= cols - 4 || y >= rows - 4 || s > cornerScore(img+step+1, step)) == 6)
152
{
153
int new_idx = atomic_inc(kp_out);
154
if( new_idx < max_keypoints )
155
{
156
kp_out[1 + 3*new_idx] = x;
157
kp_out[2 + 3*new_idx] = y;
158
kp_out[3 + 3*new_idx] = s;
159
}
160
}
161
}
162
}
163
164