#include <vector>
#include "common.hpp"
namespace CAROTENE_NS {
bool isSobel3x3Supported(const Size2D &size, BORDER_MODE border,
s32 dx, s32 dy, Margin borderMargin)
{
return dx < 3 && dx >= 0 &&
dy < 3 && dy >= 0 &&
(dx + dy) > 0 &&
isSeparableFilter3x3Supported(size, border, dx, dy, borderMargin);
}
void Sobel3x3(const Size2D &size,
const u8 * srcBase, ptrdiff_t srcStride,
s16 * dstBase, ptrdiff_t dstStride,
s32 dx, s32 dy,
BORDER_MODE borderType, u8 borderValue, Margin borderMargin)
{
internal::assertSupportedConfiguration(isSobel3x3Supported(size, borderType, dx, dy, borderMargin));
#ifdef CAROTENE_NEON
SeparableFilter3x3(size, srcBase, srcStride, dstBase, dstStride,
dx, dy, 0, 0,
borderType, borderValue, borderMargin);
#else
(void)srcBase;
(void)srcStride;
(void)dstBase;
(void)dstStride;
(void)borderValue;
#endif
}
bool isSobel3x3f32Supported(const Size2D &size, BORDER_MODE border,
s32 dx, s32 dy)
{
return isSupportedConfiguration() &&
dx < 3 && dx >= 0 &&
dy < 3 && dy >= 0 &&
(dx + dy) > 0 &&
size.width >= 4 && size.height >= 2 &&
(border == BORDER_MODE_CONSTANT ||
border == BORDER_MODE_REFLECT ||
border == BORDER_MODE_REFLECT101 ||
border == BORDER_MODE_REPLICATE );
}
void Sobel3x3(const Size2D &size,
const f32 * srcBase, ptrdiff_t srcStride,
f32 * dstBase, ptrdiff_t dstStride,
s32 dx, s32 dy,
BORDER_MODE borderType, f32 borderValue)
{
internal::assertSupportedConfiguration(isSobel3x3f32Supported(size, borderType, dx, dy));
#ifdef CAROTENE_NEON
std::vector<f32> _tmp;
f32 *tmp = 0;
if (borderType == BORDER_MODE_CONSTANT)
{
_tmp.assign(size.width + 2, borderValue);
tmp = &_tmp[1];
}
ptrdiff_t delta = (ptrdiff_t)((size.width + 2 + 31) & -32);
std::vector<f32> _tempBuf((delta << 1) + 64);
f32 *trow0 = internal::alignPtr(&_tempBuf[1], 32), *trow1 = internal::alignPtr(trow0 + delta, 32);
for( size_t y = 0; y < size.height; y++ )
{
const f32* srow0;
const f32* srow1 = internal::getRowPtr(srcBase, srcStride, y);
const f32* srow2;
f32* drow = internal::getRowPtr(dstBase, dstStride, y > 0 ? y-1 : 0);
f32* drow1 = internal::getRowPtr(dstBase, dstStride, y);
if (borderType == BORDER_MODE_REFLECT101) {
srow0 = internal::getRowPtr(srcBase, srcStride, y > 0 ? y-1 : 1);
srow2 = internal::getRowPtr(srcBase, srcStride, y < size.height-1 ? y+1 : size.height-2);
} else if (borderType == BORDER_MODE_CONSTANT) {
srow0 = y > 0 ? internal::getRowPtr(srcBase, srcStride, y-1) : tmp;
srow2 = y < size.height-1 ? internal::getRowPtr(srcBase, srcStride, y+1) : tmp;
} else {
srow0 = internal::getRowPtr(srcBase, srcStride, y > 0 ? y-1 : 0);
srow2 = internal::getRowPtr(srcBase, srcStride, y < size.height-1 ? y+1 : size.height-1);
}
float32x4_t tprev = vmovq_n_f32(0.f);
float32x4_t tcurr = vmovq_n_f32(0.f);
float32x4_t tnext = vmovq_n_f32(0.f);
float32x4_t t0, t1, t2;
size_t x = 0, bcolsn = y + 2 < size.height ? size.width : (size.width - 4);
for( ; x <= bcolsn; x += 4 )
{
internal::prefetch(srow0 + x);
internal::prefetch(srow1 + x);
internal::prefetch(srow2 + x);
float32x4_t x0 = vld1q_f32(srow0 + x);
float32x4_t x1 = vld1q_f32(srow1 + x);
float32x4_t x2 = vld1q_f32(srow2 + x);
tprev = tcurr;
tcurr = tnext;
if(!dy)
{
tnext = vaddq_f32(vaddq_f32(vaddq_f32(x1, x1), x2), x0);
}
else if(dy == 2)
{
tnext = vsubq_f32(vsubq_f32(x2, x1), vsubq_f32(x1, x0));
}
else
{
tnext = vsubq_f32(x2, x0);
}
if(!x) {
tcurr = tnext;
if (borderType == BORDER_MODE_CONSTANT)
{
tcurr = vsetq_lane_f32(borderValue,tcurr, 3);
}
else if (borderType == BORDER_MODE_REFLECT101)
{
tcurr = vsetq_lane_f32(vgetq_lane_f32(tcurr, 1),tcurr, 3);
}
else
{
tcurr = vsetq_lane_f32(vgetq_lane_f32(tcurr, 0),tcurr, 3);
}
continue;
}
internal::prefetch(trow0 + x);
internal::prefetch(trow1 + x);
t0 = vextq_f32(tprev, tcurr, 3);
t1 = tcurr;
t2 = vextq_f32(tcurr, tnext, 1);
if(!dx)
{
t0 = vaddq_f32(t0, vaddq_f32(vaddq_f32(t1, t1), t2));
}
else if(dx == 2)
{
t0 = vsubq_f32(vsubq_f32(t2, t1), vsubq_f32(t1, t0));
}
else
{
t0 = vsubq_f32(t2, t0);
}
if(!(y%2))
{
vst1q_f32(trow0 + x - 4, t0);
}
else
{
vst1q_f32(trow1 + x - 4, t0);
}
}
x -= 4;
if(x == size.width){
x--;
}
f32 prevx = 0, rowx = 0, nextx = 0;
if(!dy)
{
prevx = x > 0 ? srow2[x-1] + 2*srow1[x-1] + srow0[x-1] :
(borderType == BORDER_MODE_REFLECT101 ? srow2[1] + 2*srow1[1] + srow0[1] :
(borderType == BORDER_MODE_CONSTANT ? 4*borderValue :
srow2[0] + 2*srow1[0] + srow0[0]) );
rowx = srow2[x] + 2*srow1[x] + srow0[x];
}
else if(dy == 2)
{
prevx = x > 0 ? srow2[x-1] - 2*srow1[x-1] + srow0[x-1] :
(borderType == BORDER_MODE_REFLECT101 ? srow2[1] - 2*srow1[1] + srow0[1] :
(borderType == BORDER_MODE_CONSTANT ? 0.f :
srow2[0] - 2*srow1[0] + srow0[0]) );
rowx = srow2[x] - 2*srow1[x] + srow0[x];
}
else
{
prevx = x > 0 ? srow2[x-1] - srow0[x-1] :
(borderType == BORDER_MODE_REFLECT101 ? srow2[1] - srow0[1] :
(borderType == BORDER_MODE_CONSTANT ? 0.f :
srow2[0] - srow0[0]) );
rowx = srow2[x] - srow0[x];
}
for( ; x < size.width; x++ )
{
if(x+1 == size.width) {
if (borderType == BORDER_MODE_CONSTANT)
{
if(!dy) {
nextx = 4*borderValue;
} else {
nextx = 0.f;
}
} else if (borderType == BORDER_MODE_REFLECT101)
{
if(!dy) {
nextx = srow2[x-1] + 2*srow1[x-1] + srow0[x-1];
} else if(dy == 2) {
nextx = srow2[x-1] - 2*srow1[x-1] + srow0[x-1];
} else {
nextx = srow2[x-1] - srow0[x-1];
}
} else {
if(!dy) {
nextx = srow2[x] + 2*srow1[x] + srow0[x];
} else if(dy == 2) {
nextx = srow2[x] - 2*srow1[x] + srow0[x];
} else {
nextx = srow2[x] - srow0[x];
}
}
} else {
if(!dy) {
nextx = srow2[x+1] + 2*srow1[x+1] + srow0[x+1];
} else if(dy == 2) {
nextx = srow2[x+1] - 2*srow1[x+1] + srow0[x+1];
} else {
nextx = srow2[x+1] - srow0[x+1];
}
}
f32 res;
if(dx==1) {
res = nextx - prevx;
} else if(!dx) {
res = prevx + 2*rowx + nextx;
} else {
res = prevx - 2*rowx + nextx;
}
if(!(y%2)) {
*(trow0+x) = res;
} else {
*(trow1+x) = res;
}
prevx = rowx;
rowx = nextx;
}
if(y>0) {
for(size_t x1 = 0; x1 < size.width; x1++ )
{
if(y%2)
*(drow + x1) = trow0[x1];
else
*(drow + x1) = trow1[x1];
}
}
if(y == size.height-1) {
for(size_t x1 = 0; x1 < size.width; x1++ )
{
if(!(y%2))
*(drow1 + x1) = trow0[x1];
else
*(drow1 + x1) = trow1[x1];
}
}
}
#else
(void)srcBase;
(void)srcStride;
(void)dstBase;
(void)dstStride;
(void)borderValue;
#endif
}
}