Path: blob/master/3rdparty/carotene/src/magnitude.cpp
16337 views
/*1* By downloading, copying, installing or using the software you agree to this license.2* If you do not agree to this license, do not download, install,3* copy or use the software.4*5*6* License Agreement7* For Open Source Computer Vision Library8* (3-clause BSD License)9*10* Copyright (C) 2014, NVIDIA Corporation, all rights reserved.11* Third party copyrights are property of their respective owners.12*13* Redistribution and use in source and binary forms, with or without modification,14* are permitted provided that the following conditions are met:15*16* * Redistributions of source code must retain the above copyright notice,17* this list of conditions and the following disclaimer.18*19* * Redistributions in binary form must reproduce the above copyright notice,20* this list of conditions and the following disclaimer in the documentation21* and/or other materials provided with the distribution.22*23* * Neither the names of the copyright holders nor the names of the contributors24* may be used to endorse or promote products derived from this software25* without specific prior written permission.26*27* This software is provided by the copyright holders and contributors "as is" and28* any express or implied warranties, including, but not limited to, the implied29* warranties of merchantability and fitness for a particular purpose are disclaimed.30* In no event shall copyright holders or contributors be liable for any direct,31* indirect, incidental, special, exemplary, or consequential damages32* (including, but not limited to, procurement of substitute goods or services;33* loss of use, data, or profits; or business interruption) however caused34* and on any theory of liability, whether in contract, strict liability,35* or tort (including negligence or otherwise) arising in any way out of36* the use of this software, even if advised of the possibility of such damage.37*/3839#include "common.hpp"40#include "vtransform.hpp"4142#include <cmath>4344namespace CAROTENE_NS {4546#ifdef CAROTENE_NEON4748namespace {4950struct Magnitude51{52typedef s16 type;5354void operator() (const int16x8_t & v_src0, const int16x8_t & v_src1,55int16x8_t & v_dst) const56{57int16x4_t v_src0_p = vget_low_s16(v_src0), v_src1_p = vget_low_s16(v_src1);58float32x4_t v_sqr0 = vaddq_f32(vcvtq_f32_s32(vmull_s16(v_src0_p, v_src0_p)),59vcvtq_f32_s32(vmull_s16(v_src1_p, v_src1_p)));60v_src0_p = vget_high_s16(v_src0);61v_src1_p = vget_high_s16(v_src1);62float32x4_t v_sqr1 = vaddq_f32(vcvtq_f32_s32(vmull_s16(v_src0_p, v_src0_p)),63vcvtq_f32_s32(vmull_s16(v_src1_p, v_src1_p)));6465int32x4_t v_sqrt0 = vcvtq_s32_f32(internal::vsqrtq_f32(v_sqr0));66int32x4_t v_sqrt1 = vcvtq_s32_f32(internal::vsqrtq_f32(v_sqr1));6768v_dst = vcombine_s16(vqmovn_s32(v_sqrt0), vqmovn_s32(v_sqrt1));69}7071void operator() (const int16x4_t & v_src0, const int16x4_t & v_src1,72int16x4_t & v_dst) const73{74float32x4_t v_tmp = vaddq_f32(vcvtq_f32_s32(vmull_s16(v_src0, v_src0)),75vcvtq_f32_s32(vmull_s16(v_src1, v_src1)));76int32x4_t v_sqrt = vcvtq_s32_f32(internal::vsqrtq_f32(v_tmp));77v_dst = vqmovn_s32(v_sqrt);78}7980void operator() (const short * src0, const short * src1, short * dst) const81{82f32 src0val = (f32)src0[0], src1val = (f32)src1[0];83dst[0] = internal::saturate_cast<s16>((s32)sqrtf(src0val * src0val + src1val * src1val));84}85};8687struct MagnitudeF3288{89typedef f32 type;9091void operator() (const float32x4_t & v_src0, const float32x4_t & v_src1,92float32x4_t & v_dst) const93{94v_dst = internal::vsqrtq_f32(vaddq_f32(vmulq_f32(v_src0, v_src0), vmulq_f32(v_src1, v_src1)));95}9697void operator() (const float32x2_t & v_src0, const float32x2_t & v_src1,98float32x2_t & v_dst) const99{100v_dst = internal::vsqrt_f32(vadd_f32(vmul_f32(v_src0, v_src0), vmul_f32(v_src1, v_src1)));101}102103void operator() (const f32 * src0, const f32 * src1, f32 * dst) const104{105dst[0] = sqrtf(src0[0] * src0[0] + src1[0] * src1[0]);106}107};108109} // namespace110111#endif112113void magnitude(const Size2D &size,114const s16 * src0Base, ptrdiff_t src0Stride,115const s16 * src1Base, ptrdiff_t src1Stride,116s16 * dstBase, ptrdiff_t dstStride)117{118internal::assertSupportedConfiguration();119#ifdef CAROTENE_NEON120internal::vtransform(size,121src0Base, src0Stride,122src1Base, src1Stride,123dstBase, dstStride,124Magnitude());125#else126(void)size;127(void)src0Base;128(void)src0Stride;129(void)src1Base;130(void)src1Stride;131(void)dstBase;132(void)dstStride;133#endif134}135136void magnitude(const Size2D &size,137const f32 * src0Base, ptrdiff_t src0Stride,138const f32 * src1Base, ptrdiff_t src1Stride,139f32 * dstBase, ptrdiff_t dstStride)140{141internal::assertSupportedConfiguration();142#ifdef CAROTENE_NEON143internal::vtransform(size,144src0Base, src0Stride,145src1Base, src1Stride,146dstBase, dstStride,147MagnitudeF32());148#else149(void)size;150(void)src0Base;151(void)src0Stride;152(void)src1Base;153(void)src1Stride;154(void)dstBase;155(void)dstStride;156#endif157}158159} // namespace CAROTENE_NS160161162