Path: blob/master/modules/features2d/src/opencl/orb.cl
16339 views
// OpenCL port of the ORB feature detector and descriptor extractor1// Copyright (C) 2014, Itseez Inc. See the license at http://opencv.org2//3// The original code has been contributed by Peter Andreas Entschev, peter@entschev.com45#define LAYERINFO_SIZE 16#define LAYERINFO_OFS 07#define KEYPOINT_SIZE 38#define ORIENTED_KEYPOINT_SIZE 49#define KEYPOINT_X 010#define KEYPOINT_Y 111#define KEYPOINT_Z 212#define KEYPOINT_ANGLE 31314/////////////////////////////////////////////////////////////1516#ifdef ORB_RESPONSES1718__kernel void19ORB_HarrisResponses(__global const uchar* imgbuf, int imgstep, int imgoffset0,20__global const int* layerinfo, __global const int* keypoints,21__global float* responses, int nkeypoints )22{23int idx = get_global_id(0);24if( idx < nkeypoints )25{26__global const int* kpt = keypoints + idx*KEYPOINT_SIZE;27__global const int* layer = layerinfo + kpt[KEYPOINT_Z]*LAYERINFO_SIZE;28__global const uchar* img = imgbuf + imgoffset0 + layer[LAYERINFO_OFS] +29(kpt[KEYPOINT_Y] - blockSize/2)*imgstep + (kpt[KEYPOINT_X] - blockSize/2);3031int i, j;32int a = 0, b = 0, c = 0;33for( i = 0; i < blockSize; i++, img += imgstep-blockSize )34{35for( j = 0; j < blockSize; j++, img++ )36{37int Ix = (img[1] - img[-1])*2 + img[-imgstep+1] - img[-imgstep-1] + img[imgstep+1] - img[imgstep-1];38int Iy = (img[imgstep] - img[-imgstep])*2 + img[imgstep-1] - img[-imgstep-1] + img[imgstep+1] - img[-imgstep+1];39a += Ix*Ix;40b += Iy*Iy;41c += Ix*Iy;42}43}44responses[idx] = ((float)a * b - (float)c * c - HARRIS_K * (float)(a + b) * (a + b))*scale_sq_sq;45}46}4748#endif4950/////////////////////////////////////////////////////////////5152#ifdef ORB_ANGLES5354#define _DBL_EPSILON 2.2204460492503131e-16f55#define atan2_p1 (0.9997878412794807f*57.29577951308232f)56#define atan2_p3 (-0.3258083974640975f*57.29577951308232f)57#define atan2_p5 (0.1555786518463281f*57.29577951308232f)58#define atan2_p7 (-0.04432655554792128f*57.29577951308232f)5960inline float fastAtan2( float y, float x )61{62float ax = fabs(x), ay = fabs(y);63float a, c, c2;64if( ax >= ay )65{66c = ay/(ax + _DBL_EPSILON);67c2 = c*c;68a = (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c;69}70else71{72c = ax/(ay + _DBL_EPSILON);73c2 = c*c;74a = 90.f - (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c;75}76if( x < 0 )77a = 180.f - a;78if( y < 0 )79a = 360.f - a;80return a;81}828384__kernel void85ORB_ICAngle(__global const uchar* imgbuf, int imgstep, int imgoffset0,86__global const int* layerinfo, __global const int* keypoints,87__global float* responses, const __global int* u_max,88int nkeypoints, int half_k )89{90int idx = get_global_id(0);91if( idx < nkeypoints )92{93__global const int* kpt = keypoints + idx*KEYPOINT_SIZE;9495__global const int* layer = layerinfo + kpt[KEYPOINT_Z]*LAYERINFO_SIZE;96__global const uchar* center = imgbuf + imgoffset0 + layer[LAYERINFO_OFS] +97kpt[KEYPOINT_Y]*imgstep + kpt[KEYPOINT_X];9899int u, v, m_01 = 0, m_10 = 0;100101// Treat the center line differently, v=0102for( u = -half_k; u <= half_k; u++ )103m_10 += u * center[u];104105// Go line by line in the circular patch106for( v = 1; v <= half_k; v++ )107{108// Proceed over the two lines109int v_sum = 0;110int d = u_max[v];111for( u = -d; u <= d; u++ )112{113int val_plus = center[u + v*imgstep], val_minus = center[u - v*imgstep];114v_sum += (val_plus - val_minus);115m_10 += u * (val_plus + val_minus);116}117m_01 += v * v_sum;118}119120// we do not use OpenCL's atan2 intrinsic,121// because we want to get _exactly_ the same results as the CPU version122responses[idx] = fastAtan2((float)m_01, (float)m_10);123}124}125126#endif127128/////////////////////////////////////////////////////////////129130#ifdef ORB_DESCRIPTORS131132__kernel void133ORB_computeDescriptor(__global const uchar* imgbuf, int imgstep, int imgoffset0,134__global const int* layerinfo, __global const int* keypoints,135__global uchar* _desc, const __global int* pattern,136int nkeypoints, int dsize )137{138int idx = get_global_id(0);139if( idx < nkeypoints )140{141int i;142__global const int* kpt = keypoints + idx*ORIENTED_KEYPOINT_SIZE;143144__global const int* layer = layerinfo + kpt[KEYPOINT_Z]*LAYERINFO_SIZE;145__global const uchar* center = imgbuf + imgoffset0 + layer[LAYERINFO_OFS] +146kpt[KEYPOINT_Y]*imgstep + kpt[KEYPOINT_X];147float angle = as_float(kpt[KEYPOINT_ANGLE]);148angle *= 0.01745329251994329547f;149150float cosa;151float sina = sincos(angle, &cosa);152153__global uchar* desc = _desc + idx*dsize;154155#define GET_VALUE(idx) \156center[mad24(convert_int_rte(pattern[(idx)*2] * sina + pattern[(idx)*2+1] * cosa), imgstep, \157convert_int_rte(pattern[(idx)*2] * cosa - pattern[(idx)*2+1] * sina))]158159for( i = 0; i < dsize; i++ )160{161int val;162#if WTA_K == 2163int t0, t1;164165t0 = GET_VALUE(0); t1 = GET_VALUE(1);166val = t0 < t1;167168t0 = GET_VALUE(2); t1 = GET_VALUE(3);169val |= (t0 < t1) << 1;170171t0 = GET_VALUE(4); t1 = GET_VALUE(5);172val |= (t0 < t1) << 2;173174t0 = GET_VALUE(6); t1 = GET_VALUE(7);175val |= (t0 < t1) << 3;176177t0 = GET_VALUE(8); t1 = GET_VALUE(9);178val |= (t0 < t1) << 4;179180t0 = GET_VALUE(10); t1 = GET_VALUE(11);181val |= (t0 < t1) << 5;182183t0 = GET_VALUE(12); t1 = GET_VALUE(13);184val |= (t0 < t1) << 6;185186t0 = GET_VALUE(14); t1 = GET_VALUE(15);187val |= (t0 < t1) << 7;188189pattern += 16*2;190191#elif WTA_K == 3192int t0, t1, t2;193194t0 = GET_VALUE(0); t1 = GET_VALUE(1); t2 = GET_VALUE(2);195val = t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0);196197t0 = GET_VALUE(3); t1 = GET_VALUE(4); t2 = GET_VALUE(5);198val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 2;199200t0 = GET_VALUE(6); t1 = GET_VALUE(7); t2 = GET_VALUE(8);201val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 4;202203t0 = GET_VALUE(9); t1 = GET_VALUE(10); t2 = GET_VALUE(11);204val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 6;205206pattern += 12*2;207208#elif WTA_K == 4209int t0, t1, t2, t3, k;210int a, b;211212t0 = GET_VALUE(0); t1 = GET_VALUE(1);213t2 = GET_VALUE(2); t3 = GET_VALUE(3);214a = 0, b = 2;215if( t1 > t0 ) t0 = t1, a = 1;216if( t3 > t2 ) t2 = t3, b = 3;217k = t0 > t2 ? a : b;218val = k;219220t0 = GET_VALUE(4); t1 = GET_VALUE(5);221t2 = GET_VALUE(6); t3 = GET_VALUE(7);222a = 0, b = 2;223if( t1 > t0 ) t0 = t1, a = 1;224if( t3 > t2 ) t2 = t3, b = 3;225k = t0 > t2 ? a : b;226val |= k << 2;227228t0 = GET_VALUE(8); t1 = GET_VALUE(9);229t2 = GET_VALUE(10); t3 = GET_VALUE(11);230a = 0, b = 2;231if( t1 > t0 ) t0 = t1, a = 1;232if( t3 > t2 ) t2 = t3, b = 3;233k = t0 > t2 ? a : b;234val |= k << 4;235236t0 = GET_VALUE(12); t1 = GET_VALUE(13);237t2 = GET_VALUE(14); t3 = GET_VALUE(15);238a = 0, b = 2;239if( t1 > t0 ) t0 = t1, a = 1;240if( t3 > t2 ) t2 = t3, b = 3;241k = t0 > t2 ? a : b;242val |= k << 6;243244pattern += 16*2;245#else246#error "unknown/undefined WTA_K value; should be 2, 3 or 4"247#endif248desc[i] = (uchar)val;249}250}251}252253#endif254255256