Path: blob/master/3rdparty/carotene/src/intrinsics.hpp
16337 views
/*1* By downloading, copying, installing or using the software you agree to this license.2* If you do not agree to this license, do not download, install,3* copy or use the software.4*5*6* License Agreement7* For Open Source Computer Vision Library8* (3-clause BSD License)9*10* Copyright (C) 2014, NVIDIA Corporation, all rights reserved.11* Third party copyrights are property of their respective owners.12*13* Redistribution and use in source and binary forms, with or without modification,14* are permitted provided that the following conditions are met:15*16* * Redistributions of source code must retain the above copyright notice,17* this list of conditions and the following disclaimer.18*19* * Redistributions in binary form must reproduce the above copyright notice,20* this list of conditions and the following disclaimer in the documentation21* and/or other materials provided with the distribution.22*23* * Neither the names of the copyright holders nor the names of the contributors24* may be used to endorse or promote products derived from this software25* without specific prior written permission.26*27* This software is provided by the copyright holders and contributors "as is" and28* any express or implied warranties, including, but not limited to, the implied29* warranties of merchantability and fitness for a particular purpose are disclaimed.30* In no event shall copyright holders or contributors be liable for any direct,31* indirect, incidental, special, exemplary, or consequential damages32* (including, but not limited to, procurement of substitute goods or services;33* loss of use, data, or profits; or business interruption) however caused34* and on any theory of liability, whether in contract, strict liability,35* or tort (including negligence or otherwise) arising in any way out of36* the use of this software, even if advised of the possibility of such damage.37*/3839#ifndef CAROTENE_INTRINSICS_HPP40#define CAROTENE_INTRINSICS_HPP4142#include <carotene/definitions.hpp>4344#include <arm_neon.h>4546namespace CAROTENE_NS { namespace internal {4748/////////////// Custom NEON intrinsics ///////////////////4950// calculate reciprocal value5152inline float32x4_t vrecpq_f32(float32x4_t val)53{54float32x4_t reciprocal = vrecpeq_f32(val);55reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal);56reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal);57return reciprocal;58}5960inline float32x2_t vrecp_f32(float32x2_t val)61{62float32x2_t reciprocal = vrecpe_f32(val);63reciprocal = vmul_f32(vrecps_f32(val, reciprocal), reciprocal);64reciprocal = vmul_f32(vrecps_f32(val, reciprocal), reciprocal);65return reciprocal;66}6768// caclulate sqrt value6970inline float32x4_t vrsqrtq_f32(float32x4_t val)71{72float32x4_t e = vrsqrteq_f32(val);73e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(e, e), val), e);74e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(e, e), val), e);75return e;76}7778inline float32x2_t vrsqrt_f32(float32x2_t val)79{80float32x2_t e = vrsqrte_f32(val);81e = vmul_f32(vrsqrts_f32(vmul_f32(e, e), val), e);82e = vmul_f32(vrsqrts_f32(vmul_f32(e, e), val), e);83return e;84}8586inline float32x4_t vsqrtq_f32(float32x4_t val)87{88return vrecpq_f32(vrsqrtq_f32(val));89}9091inline float32x2_t vsqrt_f32(float32x2_t val)92{93return vrecp_f32(vrsqrt_f32(val));94}9596// table lookup with the table in a 128-bit register9798inline uint8x8_t vqtbl1_u8 (uint8x16_t a, uint8x8_t b)99{100#ifdef __aarch64__101// AArch64 supports this natively102return ::vqtbl1_u8(a, b);103#else104union { uint8x16_t v; uint8x8x2_t w; } u = { a };105return vtbl2_u8(u.w, b);106#endif107}108109} }110111#endif112113114