Path: blob/master/modules/features2d/src/opencl/fast.cl
16339 views
// OpenCL port of the FAST corner detector.1// Copyright (C) 2014, Itseez Inc. See the license at http://opencv.org23inline int cornerScore(__global const uchar* img, int step)4{5int k, tofs, v = img[0], a0 = 0, b0;6int d[16];7#define LOAD2(idx, ofs) \8tofs = ofs; d[idx] = (short)(v - img[tofs]); d[idx+8] = (short)(v - img[-tofs])9LOAD2(0, 3);10LOAD2(1, -step+3);11LOAD2(2, -step*2+2);12LOAD2(3, -step*3+1);13LOAD2(4, -step*3);14LOAD2(5, -step*3-1);15LOAD2(6, -step*2-2);16LOAD2(7, -step-3);1718#pragma unroll19for( k = 0; k < 16; k += 2 )20{21int a = min((int)d[(k+1)&15], (int)d[(k+2)&15]);22a = min(a, (int)d[(k+3)&15]);23a = min(a, (int)d[(k+4)&15]);24a = min(a, (int)d[(k+5)&15]);25a = min(a, (int)d[(k+6)&15]);26a = min(a, (int)d[(k+7)&15]);27a = min(a, (int)d[(k+8)&15]);28a0 = max(a0, min(a, (int)d[k&15]));29a0 = max(a0, min(a, (int)d[(k+9)&15]));30}3132b0 = -a0;33#pragma unroll34for( k = 0; k < 16; k += 2 )35{36int b = max((int)d[(k+1)&15], (int)d[(k+2)&15]);37b = max(b, (int)d[(k+3)&15]);38b = max(b, (int)d[(k+4)&15]);39b = max(b, (int)d[(k+5)&15]);40b = max(b, (int)d[(k+6)&15]);41b = max(b, (int)d[(k+7)&15]);42b = max(b, (int)d[(k+8)&15]);4344b0 = min(b0, max(b, (int)d[k]));45b0 = min(b0, max(b, (int)d[(k+9)&15]));46}4748return -b0-1;49}5051__kernel52void FAST_findKeypoints(53__global const uchar * _img, int step, int img_offset,54int img_rows, int img_cols,55volatile __global int* kp_loc,56int max_keypoints, int threshold )57{58int j = get_global_id(0) + 3;59int i = get_global_id(1) + 3;6061if (i < img_rows - 3 && j < img_cols - 3)62{63__global const uchar* img = _img + mad24(i, step, j + img_offset);64int v = img[0], t0 = v - threshold, t1 = v + threshold;65int k, tofs, v0, v1;66int m0 = 0, m1 = 0;6768#define UPDATE_MASK(idx, ofs) \69tofs = ofs; v0 = img[tofs]; v1 = img[-tofs]; \70m0 |= ((v0 < t0) << idx) | ((v1 < t0) << (8 + idx)); \71m1 |= ((v0 > t1) << idx) | ((v1 > t1) << (8 + idx))7273UPDATE_MASK(0, 3);74if( (m0 | m1) == 0 )75return;7677UPDATE_MASK(2, -step*2+2);78UPDATE_MASK(4, -step*3);79UPDATE_MASK(6, -step*2-2);8081#define EVEN_MASK (1+4+16+64)8283if( ((m0 | (m0 >> 8)) & EVEN_MASK) != EVEN_MASK &&84((m1 | (m1 >> 8)) & EVEN_MASK) != EVEN_MASK )85return;8687UPDATE_MASK(1, -step+3);88UPDATE_MASK(3, -step*3+1);89UPDATE_MASK(5, -step*3-1);90UPDATE_MASK(7, -step-3);91if( ((m0 | (m0 >> 8)) & 255) != 255 &&92((m1 | (m1 >> 8)) & 255) != 255 )93return;9495m0 |= m0 << 16;96m1 |= m1 << 16;9798#define CHECK0(i) ((m0 & (511 << i)) == (511 << i))99#define CHECK1(i) ((m1 & (511 << i)) == (511 << i))100101if( CHECK0(0) + CHECK0(1) + CHECK0(2) + CHECK0(3) +102CHECK0(4) + CHECK0(5) + CHECK0(6) + CHECK0(7) +103CHECK0(8) + CHECK0(9) + CHECK0(10) + CHECK0(11) +104CHECK0(12) + CHECK0(13) + CHECK0(14) + CHECK0(15) +105106CHECK1(0) + CHECK1(1) + CHECK1(2) + CHECK1(3) +107CHECK1(4) + CHECK1(5) + CHECK1(6) + CHECK1(7) +108CHECK1(8) + CHECK1(9) + CHECK1(10) + CHECK1(11) +109CHECK1(12) + CHECK1(13) + CHECK1(14) + CHECK1(15) == 0 )110return;111112{113int idx = atomic_inc(kp_loc);114if( idx < max_keypoints )115{116kp_loc[1 + 2*idx] = j;117kp_loc[2 + 2*idx] = i;118}119}120}121}122123///////////////////////////////////////////////////////////////////////////124// nonmaxSupression125126__kernel127void FAST_nonmaxSupression(128__global const int* kp_in, volatile __global int* kp_out,129__global const uchar * _img, int step, int img_offset,130int rows, int cols, int counter, int max_keypoints)131{132const int idx = get_global_id(0);133134if (idx < counter)135{136int x = kp_in[1 + 2*idx];137int y = kp_in[2 + 2*idx];138__global const uchar* img = _img + mad24(y, step, x + img_offset);139140int s = cornerScore(img, step);141142if( (x < 4 || s > cornerScore(img-1, step)) +143(y < 4 || s > cornerScore(img-step, step)) != 2 )144return;145if( (x >= cols - 4 || s > cornerScore(img+1, step)) +146(y >= rows - 4 || s > cornerScore(img+step, step)) +147(x < 4 || y < 4 || s > cornerScore(img-step-1, step)) +148(x >= cols - 4 || y < 4 || s > cornerScore(img-step+1, step)) +149(x < 4 || y >= rows - 4 || s > cornerScore(img+step-1, step)) +150(x >= cols - 4 || y >= rows - 4 || s > cornerScore(img+step+1, step)) == 6)151{152int new_idx = atomic_inc(kp_out);153if( new_idx < max_keypoints )154{155kp_out[1 + 3*new_idx] = x;156kp_out[2 + 3*new_idx] = y;157kp_out[3 + 3*new_idx] = s;158}159}160}161}162163164