Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/3rdparty/carotene/src/intrinsics.hpp
16337 views
1
/*
2
* By downloading, copying, installing or using the software you agree to this license.
3
* If you do not agree to this license, do not download, install,
4
* copy or use the software.
5
*
6
*
7
* License Agreement
8
* For Open Source Computer Vision Library
9
* (3-clause BSD License)
10
*
11
* Copyright (C) 2014, NVIDIA Corporation, all rights reserved.
12
* Third party copyrights are property of their respective owners.
13
*
14
* Redistribution and use in source and binary forms, with or without modification,
15
* are permitted provided that the following conditions are met:
16
*
17
* * Redistributions of source code must retain the above copyright notice,
18
* this list of conditions and the following disclaimer.
19
*
20
* * Redistributions in binary form must reproduce the above copyright notice,
21
* this list of conditions and the following disclaimer in the documentation
22
* and/or other materials provided with the distribution.
23
*
24
* * Neither the names of the copyright holders nor the names of the contributors
25
* may be used to endorse or promote products derived from this software
26
* without specific prior written permission.
27
*
28
* This software is provided by the copyright holders and contributors "as is" and
29
* any express or implied warranties, including, but not limited to, the implied
30
* warranties of merchantability and fitness for a particular purpose are disclaimed.
31
* In no event shall copyright holders or contributors be liable for any direct,
32
* indirect, incidental, special, exemplary, or consequential damages
33
* (including, but not limited to, procurement of substitute goods or services;
34
* loss of use, data, or profits; or business interruption) however caused
35
* and on any theory of liability, whether in contract, strict liability,
36
* or tort (including negligence or otherwise) arising in any way out of
37
* the use of this software, even if advised of the possibility of such damage.
38
*/
39
40
#ifndef CAROTENE_INTRINSICS_HPP
41
#define CAROTENE_INTRINSICS_HPP
42
43
#include <carotene/definitions.hpp>
44
45
#include <arm_neon.h>
46
47
namespace CAROTENE_NS { namespace internal {
48
49
/////////////// Custom NEON intrinsics ///////////////////
50
51
// calculate reciprocal value
52
53
inline float32x4_t vrecpq_f32(float32x4_t val)
54
{
55
float32x4_t reciprocal = vrecpeq_f32(val);
56
reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal);
57
reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal);
58
return reciprocal;
59
}
60
61
inline float32x2_t vrecp_f32(float32x2_t val)
62
{
63
float32x2_t reciprocal = vrecpe_f32(val);
64
reciprocal = vmul_f32(vrecps_f32(val, reciprocal), reciprocal);
65
reciprocal = vmul_f32(vrecps_f32(val, reciprocal), reciprocal);
66
return reciprocal;
67
}
68
69
// caclulate sqrt value
70
71
inline float32x4_t vrsqrtq_f32(float32x4_t val)
72
{
73
float32x4_t e = vrsqrteq_f32(val);
74
e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(e, e), val), e);
75
e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(e, e), val), e);
76
return e;
77
}
78
79
inline float32x2_t vrsqrt_f32(float32x2_t val)
80
{
81
float32x2_t e = vrsqrte_f32(val);
82
e = vmul_f32(vrsqrts_f32(vmul_f32(e, e), val), e);
83
e = vmul_f32(vrsqrts_f32(vmul_f32(e, e), val), e);
84
return e;
85
}
86
87
inline float32x4_t vsqrtq_f32(float32x4_t val)
88
{
89
return vrecpq_f32(vrsqrtq_f32(val));
90
}
91
92
inline float32x2_t vsqrt_f32(float32x2_t val)
93
{
94
return vrecp_f32(vrsqrt_f32(val));
95
}
96
97
// table lookup with the table in a 128-bit register
98
99
inline uint8x8_t vqtbl1_u8 (uint8x16_t a, uint8x8_t b)
100
{
101
#ifdef __aarch64__
102
// AArch64 supports this natively
103
return ::vqtbl1_u8(a, b);
104
#else
105
union { uint8x16_t v; uint8x8x2_t w; } u = { a };
106
return vtbl2_u8(u.w, b);
107
#endif
108
}
109
110
} }
111
112
#endif
113
114