Path: blob/master/3rdparty/carotene/src/laplacian.cpp
16337 views
/*1* By downloading, copying, installing or using the software you agree to this license.2* If you do not agree to this license, do not download, install,3* copy or use the software.4*5*6* License Agreement7* For Open Source Computer Vision Library8* (3-clause BSD License)9*10* Copyright (C) 2015, NVIDIA Corporation, all rights reserved.11* Third party copyrights are property of their respective owners.12*13* Redistribution and use in source and binary forms, with or without modification,14* are permitted provided that the following conditions are met:15*16* * Redistributions of source code must retain the above copyright notice,17* this list of conditions and the following disclaimer.18*19* * Redistributions in binary form must reproduce the above copyright notice,20* this list of conditions and the following disclaimer in the documentation21* and/or other materials provided with the distribution.22*23* * Neither the names of the copyright holders nor the names of the contributors24* may be used to endorse or promote products derived from this software25* without specific prior written permission.26*27* This software is provided by the copyright holders and contributors "as is" and28* any express or implied warranties, including, but not limited to, the implied29* warranties of merchantability and fitness for a particular purpose are disclaimed.30* In no event shall copyright holders or contributors be liable for any direct,31* indirect, incidental, special, exemplary, or consequential damages32* (including, but not limited to, procurement of substitute goods or services;33* loss of use, data, or profits; or business interruption) however caused34* and on any theory of liability, whether in contract, strict liability,35* or tort (including negligence or otherwise) arising in any way out of36* the use of this software, even if advised of the possibility of such damage.37*/3839#include "common.hpp"40#include "saturate_cast.hpp"4142#include <vector>4344namespace CAROTENE_NS {4546bool isLaplacian3x3Supported(const Size2D &size, BORDER_MODE border)47{48return isSupportedConfiguration() && size.width >= 8 &&49(border == BORDER_MODE_CONSTANT ||50border == BORDER_MODE_REPLICATE);51}5253void Laplacian3x3(const Size2D &size,54const u8 * srcBase, ptrdiff_t srcStride,55u8 * dstBase, ptrdiff_t dstStride,56BORDER_MODE border, u8 borderValue)57{58internal::assertSupportedConfiguration(isLaplacian3x3Supported(size, border));59#ifdef CAROTENE_NEON60const uint16x8_t v_border_x3 = vdupq_n_u16(borderValue * 3);61const uint16x8_t v_zero = vdupq_n_u16(0);62const uint8x8_t v_border = vdup_n_u8(borderValue);6364uint8x8_t vsub;65uint16x8_t tprev = v_zero, tcurr = v_zero, tnext = v_zero;66uint16x8_t t0 = v_zero, t1 = v_zero, t2 = v_zero;6768ptrdiff_t width = (ptrdiff_t)size.width, height = (ptrdiff_t)size.height;6970for (ptrdiff_t y = 0; y < height; ++y)71{72const u8 * srow0 = y == 0 && border == BORDER_MODE_CONSTANT ? NULL : internal::getRowPtr(srcBase, srcStride, std::max<ptrdiff_t>(y - 1, 0));73const u8 * srow1 = internal::getRowPtr(srcBase, srcStride, y);74const u8 * srow2 = y + 1 == height && border == BORDER_MODE_CONSTANT ? NULL : internal::getRowPtr(srcBase, srcStride, std::min(y + 1, height - 1));75u8 * drow = internal::getRowPtr(dstBase, dstStride, y);7677s16 prevx = 0, currx = 0, nextx = 0;78ptrdiff_t x = 0;79const ptrdiff_t bwidth = y + 2 < height ? width : (width - 8);8081// perform vertical convolution82for ( ; x <= bwidth; x += 8)83{84internal::prefetch(srow0 + x);85internal::prefetch(srow1 + x);86internal::prefetch(srow2 + x);8788uint8x8_t x0 = !srow0 ? v_border : vld1_u8(srow0 + x);89uint8x8_t x1 = vld1_u8(srow1 + x);90uint8x8_t x2 = !srow2 ? v_border : vld1_u8(srow2 + x);9192// calculate values for plain CPU part below if needed93if (x + 8 >= bwidth)94{95ptrdiff_t x3 = x == width ? width - 1 : x;96ptrdiff_t x4 = border == BORDER_MODE_CONSTANT ? x3 - 1 : std::max<ptrdiff_t>(x3 - 1, 0);9798if (border == BORDER_MODE_CONSTANT && x4 < 0)99prevx = borderValue;100else101prevx = (srow2 ? srow2[x4] : borderValue) + srow1[x4] + (srow0 ? srow0[x4] : borderValue);102103currx = (srow2 ? srow2[x3] : borderValue) + srow1[x3] + (srow0 ? srow0[x3] : borderValue);104}105106// make shift107if (x)108{109tprev = tcurr;110tcurr = tnext;111}112113// and calculate next value114tnext = vaddw_u8(vaddl_u8(x0, x1), x2);115116// make extrapolation for the first elements117if (!x)118{119// make border120if (border == BORDER_MODE_CONSTANT)121tcurr = v_border_x3;122else if (border == BORDER_MODE_REPLICATE)123tcurr = vdupq_n_u16(vgetq_lane_u16(tnext, 0));124125vsub = x1;126127continue;128}129130// combine 3 "shifted" vectors131t0 = vextq_u16(tprev, tcurr, 7);132t1 = tcurr;133t2 = vextq_u16(tcurr, tnext, 1);134135// and add them136t0 = vqaddq_u16(t0, vqaddq_u16(t1, t2));137138int16x8_t tt0 = vsubq_s16(vreinterpretq_s16_u16(t0),139vreinterpretq_s16_u16(vaddw_u8(vshll_n_u8(vsub, 3), vsub)));140uint8x8_t it0 = vqmovun_s16(tt0);141vst1_u8(drow + x - 8, it0);142143vsub = x1;144}145146x -= 8;147if (x == width)148--x;149150for ( ; x < width; ++x)151{152// make extrapolation for the last elements153if (x + 1 >= width)154{155if (border == BORDER_MODE_CONSTANT)156nextx = borderValue * 3;157else if (border == BORDER_MODE_REPLICATE)158nextx = srow2[x] + srow1[x] + srow0[x];159}160else161{162nextx = (srow2 ? srow2[x + 1] : borderValue) +163srow1[x + 1] +164(srow0 ? srow0[x + 1] : borderValue);165}166167s32 val = (prevx + currx + nextx) - 9 * srow1[x];168drow[x] = internal::saturate_cast<u8>((s32)val);169170// make shift171prevx = currx;172currx = nextx;173}174}175#else176(void)size;177(void)srcBase;178(void)srcStride;179(void)dstBase;180(void)dstStride;181(void)border;182(void)borderValue;183#endif184}185186bool isLaplacianOpenCVSupported(const Size2D &size, BORDER_MODE border)187{188return isSupportedConfiguration() &&189size.width >= 8 && size.height >= 1 &&190(border == BORDER_MODE_CONSTANT ||191border == BORDER_MODE_REFLECT ||192border == BORDER_MODE_REFLECT101 ||193border == BORDER_MODE_REPLICATE);194}195196void Laplacian1OpenCV(const Size2D &size,197const u8 * srcBase, ptrdiff_t srcStride,198s16 * dstBase, ptrdiff_t dstStride,199BORDER_MODE border, u8 borderValue)200{201internal::assertSupportedConfiguration(isLaplacianOpenCVSupported(size, border));202#ifdef CAROTENE_NEON203ptrdiff_t rows = size.height, cols = size.width;204205std::vector<u8> _tmp;206u8 *tmp = 0;207if (border == BORDER_MODE_CONSTANT)208{209_tmp.assign(cols + 4,borderValue);210tmp = &_tmp[2];211}212213for( ptrdiff_t y = 0; y < rows; y++ )214{215const u8* v0 = 0;216const u8* v1 = internal::getRowPtr(srcBase, srcStride, y);217const u8* v2 = 0;218// make border219if (border == BORDER_MODE_REFLECT101) {220v0 = internal::getRowPtr(srcBase, srcStride, y > 0 ? y-1 : y+1);221v2 = internal::getRowPtr(srcBase, srcStride, y < rows-1 ? y+1 : rows > 1 ? rows-2 : 0);222} else if (border == BORDER_MODE_CONSTANT) {223v0 = y > 0 ? internal::getRowPtr(srcBase, srcStride, y-1) : tmp;224v2 = y < rows-1 ? internal::getRowPtr(srcBase, srcStride, y+1) : tmp;225} else {226v0 = internal::getRowPtr(srcBase, srcStride, y > 0 ? y-1 : 0);227v2 = internal::getRowPtr(srcBase, srcStride, y < rows-1 ? y+1 : rows > 0 ? rows-1 : 0);228}229s16* drow = internal::getRowPtr(dstBase, dstStride, y);230231int16x8_t tcurr = vmovq_n_s16(0x0);232int16x8_t tnext = vmovq_n_s16(0x0);233int16x8_t t0, t2;234uint8x8_t xx0 = vmov_n_u8(0x0);235uint8x8_t xx1 = vmov_n_u8(0x0);236uint8x8_t xx2 = vmov_n_u8(0x0);237ptrdiff_t x = 0;238const ptrdiff_t bcols = y + 2 < rows ? cols : (cols - 8);239for( ; x <= bcols; x += 8 )240{241internal::prefetch(v0 + x);242internal::prefetch(v1 + x);243internal::prefetch(v2 + x);244245uint8x8_t x0 = vld1_u8(v0 + x);246uint8x8_t x1 = vld1_u8(v1 + x);247uint8x8_t x2 = vld1_u8(v2 + x);248249if(x) {250xx0 = xx1;251xx1 = xx2;252} else {253xx1 = x1;254// make border255if (border == BORDER_MODE_REPLICATE || border == BORDER_MODE_REFLECT)256{257xx1 = vset_lane_u8(vget_lane_u8(x1, 0),x1, 7);258}259else if (border == BORDER_MODE_CONSTANT)260{261xx1 = vset_lane_u8(borderValue, x1, 7);262}263else if (border == BORDER_MODE_REFLECT101)264{265xx1 = vset_lane_u8(vget_lane_u8(x1, 1),x1, 7);266}267}268xx2 = x1;269270if(x) {271tcurr = tnext;272}273tnext = vsubq_s16(vreinterpretq_s16_u16(vaddl_u8(x0, x2)),274vreinterpretq_s16_u16(vshll_n_u8(x1, 2)));275276if(!x) {277tcurr = tnext;278continue;279}280t0 = vreinterpretq_s16_u16(vmovl_u8(vext_u8(xx0, xx1, 7)));281t2 = vreinterpretq_s16_u16(vmovl_u8(vext_u8(xx1, xx2, 1)));282t0 = vaddq_s16(vqaddq_s16(t0, t2), tcurr);283284vst1q_s16(drow + x - 8, t0);285}286287x -= 8;288if(x == cols){289x--;290}291292for( ; x < cols; x++ )293{294s16 nextx;295s16 prevx;296// make border297if (border == BORDER_MODE_REPLICATE || border == BORDER_MODE_REFLECT)298{299prevx = x == 0 ? v1[0] : v1[x-1];300nextx = x == cols-1 ? v1[x] : v1[x+1];301}302else if (border == BORDER_MODE_REFLECT101)303{304prevx = x == 0 ? v1[1] : v1[x-1];305nextx = x == cols-1 ? v1[x-1] : v1[x+1];306}307else //if (border == BORDER_MODE_CONSTANT)308{309prevx = x == 0 ? borderValue : v1[x-1];310nextx = x == cols-1 ? borderValue : v1[x+1];311}312*(drow+x) = prevx + nextx - 4*v1[x] + v0[x] + v2[x];313}314}315#else316(void)size;317(void)srcBase;318(void)srcStride;319(void)dstBase;320(void)dstStride;321(void)border;322(void)borderValue;323#endif324}325326void Laplacian3OpenCV(const Size2D &size,327const u8 * srcBase, ptrdiff_t srcStride,328s16 * dstBase, ptrdiff_t dstStride,329BORDER_MODE border, u8 borderValue)330{331internal::assertSupportedConfiguration(isLaplacianOpenCVSupported(size, border));332#ifdef CAROTENE_NEON333ptrdiff_t rows = size.height, cols = size.width;334335std::vector<u8> _tmp;336u8 *tmp = 0;337if (border == BORDER_MODE_CONSTANT)338{339_tmp.assign(cols + 4,borderValue);340tmp = &_tmp[2];341}342343for( ptrdiff_t y = 0; y < rows; y++ )344{345const u8* v0 = 0;346const u8* v1 = internal::getRowPtr(srcBase, srcStride, y);347const u8* v2 = 0;348// make border349if (border == BORDER_MODE_REFLECT101) {350v0 = internal::getRowPtr(srcBase, srcStride, y > 0 ? y-1 : y+1);351v2 = internal::getRowPtr(srcBase, srcStride, y < rows-1 ? y+1 : rows > 1 ? rows-2 : 0);352} else if (border == BORDER_MODE_CONSTANT) {353v0 = y > 0 ? internal::getRowPtr(srcBase, srcStride, y-1) : tmp;354v2 = y < rows-1 ? internal::getRowPtr(srcBase, srcStride, y+1) : tmp;355} else {356v0 = internal::getRowPtr(srcBase, srcStride, y > 0 ? y-1 : 0);357v2 = internal::getRowPtr(srcBase, srcStride, y < rows-1 ? y+1 : rows > 0 ? rows-1 : 0);358}359s16* drow = internal::getRowPtr(dstBase, dstStride, y);360361int16x8_t tprev = vmovq_n_s16(0x0);362int16x8_t tcurr = vmovq_n_s16(0x0);363int16x8_t tnext = vmovq_n_s16(0x0);364int16x8_t tc = vmovq_n_s16(0x0);365int16x8_t t0, t2, tcnext;366ptrdiff_t x = 0;367const ptrdiff_t bcols = y + 2 < rows ? cols : (cols - 8);368for( ; x <= bcols; x += 8 )369{370internal::prefetch(v0 + x);371internal::prefetch(v1 + x);372internal::prefetch(v2 + x);373374uint8x8_t x0 = vld1_u8(v0 + x);375uint8x8_t x1 = vld1_u8(v1 + x);376uint8x8_t x2 = vld1_u8(v2 + x);377tcnext = vreinterpretq_s16_u16(vshll_n_u8(x1, 2));378379if(x) {380tprev = tcurr;381tcurr = tnext;382}383tnext = vreinterpretq_s16_u16(vaddl_u8(x0, x2));384385if(!x) {386tcurr = tnext;387tc = tcnext;388389// make border390if (border == BORDER_MODE_REPLICATE || border == BORDER_MODE_REFLECT)391{392tcurr = vsetq_lane_s16(vgetq_lane_s16(tcurr, 0),tcurr, 7);393}394else if (border == BORDER_MODE_CONSTANT)395{396tcurr = vsetq_lane_s16(borderValue, tcurr, 7);397}398else if (border == BORDER_MODE_REFLECT101)399{400tcurr = vsetq_lane_s16(vgetq_lane_s16(tcurr, 1),tcurr, 7);401}402continue;403}404405t0 = vextq_s16(tprev, tcurr, 7);406t2 = vextq_s16(tcurr, tnext, 1);407408t0 = vsubq_s16(vqaddq_s16(t0, t2), tc);409tc = tcnext;410411t0 = vshlq_n_s16(t0, 1);412vst1q_s16(drow + x - 8, t0);413}414x -= 8;415if(x == cols){416x--;417}418419for( ; x < cols; x++ )420{421s16 nextx, nextx2;422s16 prevx, prevx2;423// make border424if (border == BORDER_MODE_REPLICATE || border == BORDER_MODE_REFLECT)425{426prevx = x == 0 ? v0[0] : v0[x-1];427prevx2 = x == 0 ? v2[0] : v2[x-1];428nextx = x == cols-1 ? v0[x] : v0[x+1];429nextx2 = x == cols-1 ? v2[x] : v2[x+1];430}431else if (border == BORDER_MODE_REFLECT101)432{433prevx = x == 0 ? v0[1] : v0[x-1];434prevx2 = x == 0 ? v2[1] : v2[x-1];435nextx = x == cols-1 ? v0[x-1] : v0[x+1];436nextx2 = x == cols-1 ? v2[x-1] : v2[x+1];437}438else //if (border == BORDER_MODE_CONSTANT)439{440prevx = x == 0 ? borderValue : v0[x-1];441prevx2 = x == 0 ? borderValue : v2[x-1];442nextx = x == cols-1 ? borderValue : v0[x+1];443nextx2 = x == cols-1 ? borderValue : v2[x+1];444}445s16 res = prevx + nextx - 4*v1[x] + prevx2 + nextx2;446*(drow+x) = 2*res;447}448}449#else450(void)size;451(void)srcBase;452(void)srcStride;453(void)dstBase;454(void)dstStride;455(void)border;456(void)borderValue;457#endif458}459460void Laplacian5OpenCV(const Size2D &size,461const u8 * srcBase, ptrdiff_t srcStride,462s16 * dstBase, ptrdiff_t dstStride,463BORDER_MODE border, u8 borderValue)464{465internal::assertSupportedConfiguration(isLaplacianOpenCVSupported(size, border));466#ifdef CAROTENE_NEON467ptrdiff_t rows = size.height, cols = size.width;468469std::vector<u8> _tmp;470u8 *tmp = 0;471if (border == BORDER_MODE_CONSTANT)472{473_tmp.assign(cols + 4,borderValue);474tmp = &_tmp[2];475}476477for( ptrdiff_t y = 0; y < rows; y++ )478{479const u8* v0 = 0;480const u8* v1 = 0;481const u8* v2 = internal::getRowPtr(srcBase, srcStride, y);482const u8* v3 = 0;483const u8* v4 = 0;484// make border485if (border == BORDER_MODE_REPLICATE) {486v0 = internal::getRowPtr(srcBase, srcStride, y > 1 ? y-2 : 0);487v1 = internal::getRowPtr(srcBase, srcStride, y > 0 ? y-1 : 0);488v3 = internal::getRowPtr(srcBase, srcStride, y < rows-1 ? y+1 : rows > 0 ? rows-1 : 0);489v4 = internal::getRowPtr(srcBase, srcStride, y < rows-2 ? y+2 : rows > 0 ? rows-1 : 0);490} else if (border == BORDER_MODE_REFLECT) {491v0 = internal::getRowPtr(srcBase, srcStride, y > 1 ? y-2 : rows > 1 ? 1-y : 0);492v1 = internal::getRowPtr(srcBase, srcStride, y > 0 ? y-1 : 0);493v3 = internal::getRowPtr(srcBase, srcStride, y < rows-1 ? y+1 : rows > 0 ? rows-1 : 0);494v4 = internal::getRowPtr(srcBase, srcStride, y < rows-2 ? y+2 : rows > 1 ? 2*rows-(y+3) : 0);495} else if (border == BORDER_MODE_REFLECT101) {496v0 = internal::getRowPtr(srcBase, srcStride, y > 1 ? y-2 : rows > 2-y ? 2-y : 0); ///check497v1 = internal::getRowPtr(srcBase, srcStride, y > 0 ? y-1 : rows > 1 ? 1 : 0);498v3 = internal::getRowPtr(srcBase, srcStride, y < rows-1 ? y+1 : rows > 1 ? rows-2 : 0);499v4 = internal::getRowPtr(srcBase, srcStride, y < rows-2 ? y+2 : rows > 2 ? 2*rows-(y+4) : 0);///bad if rows=2 y=1 rows - 4 + (2,1)500} else if (border == BORDER_MODE_CONSTANT) {501v0 = y > 1 ? internal::getRowPtr(srcBase, srcStride, y-2) : tmp;502v1 = y > 0 ? internal::getRowPtr(srcBase, srcStride, y-1) : tmp;503v3 = y < rows-1 ? internal::getRowPtr(srcBase, srcStride, y+1) : tmp;504v4 = y < rows-2 ? internal::getRowPtr(srcBase, srcStride, y+2) : tmp;505}506s16* drow = internal::getRowPtr(dstBase, dstStride, y);507508int16x8_t tnext, tc, t0;509int16x8_t tnext2, tnext3;510int16x8_t tnext1Old, tnext2Old, tnext3Old;511int16x8_t tnext4OldOldOld, tnext5OldOldOld;512513int16x8_t tcurr1 = vmovq_n_s16(0x0);514int16x8_t tnext1 = vmovq_n_s16(0x0);515int16x8_t tprev1 = vmovq_n_s16(0x0);516int16x8_t tpprev1 = vmovq_n_s16(0x0);517int16x8_t tppprev1 = vmovq_n_s16(0x0);518519int16x8_t tnext4Old = vmovq_n_s16(0x0);520int16x8_t tnext5Old = vmovq_n_s16(0x0);521int16x8_t tnext1OldOld = vmovq_n_s16(0x0);522int16x8_t tnext2OldOld = vmovq_n_s16(0x0);523int16x8_t tnext3OldOld = vmovq_n_s16(0x0);524int16x8_t tnext4OldOld = vmovq_n_s16(0x0);525int16x8_t tnext5OldOld = vmovq_n_s16(0x0);526527// do vertical convolution528ptrdiff_t x = 0;529const ptrdiff_t bcols = y + 3 < rows ? cols : (cols - 8);530for( ; x <= bcols; x += 8 )531{532internal::prefetch(v0 + x);533internal::prefetch(v1 + x);534internal::prefetch(v2 + x);535internal::prefetch(v3 + x);536internal::prefetch(v4 + x);537538uint8x8_t x0 = vld1_u8(v0 + x);539uint8x8_t x1 = vld1_u8(v1 + x);540uint8x8_t x2 = vld1_u8(v2 + x);541uint8x8_t x3 = vld1_u8(v3 + x);542uint8x8_t x4 = vld1_u8(v4 + x);543if(x) {544tcurr1 = tnext1;545}546547tnext4OldOldOld = tnext4Old;548tnext5OldOldOld = tnext5Old;549tnext1Old = tnext1OldOld;550tnext2Old = tnext2OldOld;551tnext3Old = tnext3OldOld;552tnext4Old = tnext4OldOld;553tnext5Old = tnext5OldOld;554555tnext3 = vreinterpretq_s16_u16(vaddq_u16(vaddl_u8(x3, x2),vaddl_u8(x2, x1)));556tnext3 = vshlq_n_s16(tnext3, 1);557558tc = vreinterpretq_s16_u16(vsubl_u8(x4, x2));559tnext = vreinterpretq_s16_u16(vsubl_u8(x2, x0));560tnext2 = vsubq_s16(tc, tnext);561562tnext1 = vaddq_s16(tnext3, tnext2);563// tnext1 = x0 + 2*x1 + 2*x2 + 2*x3 + x4564565tnext2 = vshlq_n_s16(tnext2, 1);566// tnext2 = 2*x4 - 4*x2 + 2*x0567568tnext3 = vsubq_s16(tnext2, vshlq_n_s16(tnext3, 1));569// tnext3 = 2*x0 - 4*x1 - 12*x2 - 4*x3 + 2*x4570571tnext1OldOld = tnext1;572tnext2OldOld = tnext2;573tnext3OldOld = tnext3;574tnext4OldOld = tnext2;575tnext5OldOld = tnext1;576577if(x) {578tnext1 = vextq_s16(tnext1Old, tnext1, 2);579tcurr1 = vextq_s16(tnext2Old, tnext2, 1);580tprev1 = tnext3Old;581582if(x!=8) {583tpprev1 = vextq_s16(tnext4OldOldOld, tnext4Old, 7);584tppprev1 = vextq_s16(tnext5OldOldOld, tnext5Old, 6);585}586}587588if(!x) {589// make border590if (border == BORDER_MODE_REPLICATE) {591tpprev1 = vextq_s16(tnext2, tnext2, 7);592tpprev1 = vsetq_lane_s16(vgetq_lane_s16(tpprev1, 1),tpprev1, 0);593594tprev1 = vextq_s16(tnext1, tnext1, 6);595tprev1 = vsetq_lane_s16(vgetq_lane_s16(tprev1, 2),tprev1, 0);596tprev1 = vsetq_lane_s16(vgetq_lane_s16(tprev1, 2),tprev1, 1);597} else if (border == BORDER_MODE_REFLECT) {598tpprev1 = vextq_s16(tnext2, tnext2, 7);599tpprev1 = vsetq_lane_s16(vgetq_lane_s16(tpprev1, 1),tpprev1, 0);600601tprev1 = vextq_s16(tnext1, tnext1, 6);602tprev1 = vsetq_lane_s16(vgetq_lane_s16(tprev1, 3),tprev1, 0);603tprev1 = vsetq_lane_s16(vgetq_lane_s16(tprev1, 2),tprev1, 1);604} else if (border == BORDER_MODE_REFLECT101) {605tpprev1 = vextq_s16(tnext2, tnext2, 7);606tpprev1 = vsetq_lane_s16(vgetq_lane_s16(tpprev1, 2),tpprev1, 0);607608tprev1 = vextq_s16(tnext1, tnext1, 6);609tprev1 = vsetq_lane_s16(vgetq_lane_s16(tprev1, 3),tprev1, 1);610tprev1 = vsetq_lane_s16(vgetq_lane_s16(tprev1, 4),tprev1, 0);611} else if (border == BORDER_MODE_CONSTANT) {612tpprev1 = vextq_s16(tnext2, tnext2, 7);613tpprev1 = vsetq_lane_s16(borderValue, tpprev1, 0);614615tprev1 = vextq_s16(tnext1, tnext1, 6);616tprev1 = vsetq_lane_s16(borderValue, tprev1, 0);617tprev1 = vsetq_lane_s16(borderValue, tprev1, 1);618}619tppprev1 = tprev1;620continue;621}622623t0 = vaddq_s16(vaddq_s16(vqaddq_s16(tcurr1, tprev1), vqaddq_s16(tpprev1, tppprev1)), tnext1);624t0 = vaddq_s16(t0, t0);625vst1q_s16(drow + x - 8, t0);626}627x -= 8;628if(x >= cols - 1)629x = cols-2;630631s16 pprevx = 0;632s16 prevx = 0;633s16 nextx = 0;634s16 nnextx = 0;635636for( ; x < cols; x++ )637{638if (x == 0) {639// make border640if (border == BORDER_MODE_REPLICATE) {641pprevx = v0[0] + 2*v1[0] + 2*v2[0] + 2*v3[0] + v4[0];642prevx = 2*v0[0] - 4*v2[0] + 2*v4[0];643} else if (border == BORDER_MODE_REFLECT) {644pprevx = v0[1] + 2*v1[1] + 2*v2[1] + 2*v3[1] + v4[1];645prevx = 2*v0[0] - 4*v2[0] + 2*v4[0];646} else if (border == BORDER_MODE_REFLECT101) {647pprevx = v0[2] + 2*v1[2] + 2*v2[2] + 2*v3[2] + v4[2];648prevx = 2*v0[1] - 4*v2[1] + 2*v4[1];649} else if (border == BORDER_MODE_CONSTANT) {650pprevx = 8 * borderValue;651prevx = 0;652}653} else if (x == 1) {654// make border655if (border == BORDER_MODE_REPLICATE || border == BORDER_MODE_REFLECT) {656pprevx = v0[0] + 2*v1[0] + 2*v2[0] + 2*v3[0] + v4[0];657} else if (border == BORDER_MODE_REFLECT101) {658pprevx = v0[1] + 2*v1[1] + 2*v2[1] + 2*v3[1] + v4[1];659} else if (border == BORDER_MODE_CONSTANT) {660pprevx = 8 * borderValue;661}662prevx = 2*v0[0] - 4*v2[0] + 2*v4[0];663} else {664pprevx = v0[x-2] + 2*v1[x-2] + 2*v2[x-2] + 2*v3[x-2] + v4[x-2];665prevx = 2*v0[x-1] - 4*v2[x-1] + 2*v4[x-1];666}667s16 currx = 2*v0[x] - 4*v1[x] - 12*v2[x] - 4*v3[x] + 2*v4[x];668if (x == cols-1) {669// make border670if (border == BORDER_MODE_REPLICATE) {671nextx = 2*v0[x] - 4*v2[x] + 2*v4[x];672nnextx = v0[x] + 2*v1[x] + 2*v2[x] + 2*v3[x] + v4[x];673} else if (border == BORDER_MODE_REFLECT) {674nextx = 2*v0[x] - 4*v2[x] + 2*v4[x];675nnextx = v0[x-1] + 2*v1[x-1] + 2*v2[x-1] + 2*v3[x-1] + v4[x-1];676} else if (border == BORDER_MODE_REFLECT101) {677nextx = 2*v0[x-1] - 4*v2[x-1] + 2*v4[x-1];678nnextx = v0[x-2] + 2*v1[x-2] + 2*v2[x-2] + 2*v3[x-2] + v4[x-2];679} else if (border == BORDER_MODE_CONSTANT) {680nextx = 0;681nnextx = 8 * borderValue;682}683} else if (x == cols-2) {684// make border685if (border == BORDER_MODE_REPLICATE || border == BORDER_MODE_REFLECT) {686nnextx = v0[x+1] + 2*v1[x+1] + 2*v2[x+1] + 2*v3[x+1] + v4[x+1];687} else if (border == BORDER_MODE_REFLECT101) {688nnextx = v0[x] + 2*v1[x] + 2*v2[x] + 2*v3[x] + v4[x];689} else if (border == BORDER_MODE_CONSTANT) {690nnextx = 8 * borderValue;691}692nextx = 2*v0[x+1] - 4*v2[x+1] + 2*v4[x+1];693} else {694nextx = 2*v0[x+1] - 4*v2[x+1] + 2*v4[x+1];695nnextx = v0[x+2] + 2*v1[x+2] + 2*v2[x+2] + 2*v3[x+2] + v4[x+2];696}697s16 res = pprevx + prevx + currx + nextx + nnextx;698*(drow+x) = 2*res;699}700}701#else702(void)size;703(void)srcBase;704(void)srcStride;705(void)dstBase;706(void)dstStride;707(void)border;708(void)borderValue;709#endif710}711712} // namespace CAROTENE_NS713714715