Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/3rdparty/carotene/src/magnitude.cpp
16337 views
1
/*
2
* By downloading, copying, installing or using the software you agree to this license.
3
* If you do not agree to this license, do not download, install,
4
* copy or use the software.
5
*
6
*
7
* License Agreement
8
* For Open Source Computer Vision Library
9
* (3-clause BSD License)
10
*
11
* Copyright (C) 2014, NVIDIA Corporation, all rights reserved.
12
* Third party copyrights are property of their respective owners.
13
*
14
* Redistribution and use in source and binary forms, with or without modification,
15
* are permitted provided that the following conditions are met:
16
*
17
* * Redistributions of source code must retain the above copyright notice,
18
* this list of conditions and the following disclaimer.
19
*
20
* * Redistributions in binary form must reproduce the above copyright notice,
21
* this list of conditions and the following disclaimer in the documentation
22
* and/or other materials provided with the distribution.
23
*
24
* * Neither the names of the copyright holders nor the names of the contributors
25
* may be used to endorse or promote products derived from this software
26
* without specific prior written permission.
27
*
28
* This software is provided by the copyright holders and contributors "as is" and
29
* any express or implied warranties, including, but not limited to, the implied
30
* warranties of merchantability and fitness for a particular purpose are disclaimed.
31
* In no event shall copyright holders or contributors be liable for any direct,
32
* indirect, incidental, special, exemplary, or consequential damages
33
* (including, but not limited to, procurement of substitute goods or services;
34
* loss of use, data, or profits; or business interruption) however caused
35
* and on any theory of liability, whether in contract, strict liability,
36
* or tort (including negligence or otherwise) arising in any way out of
37
* the use of this software, even if advised of the possibility of such damage.
38
*/
39
40
#include "common.hpp"
41
#include "vtransform.hpp"
42
43
#include <cmath>
44
45
namespace CAROTENE_NS {
46
47
#ifdef CAROTENE_NEON
48
49
namespace {
50
51
struct Magnitude
52
{
53
typedef s16 type;
54
55
void operator() (const int16x8_t & v_src0, const int16x8_t & v_src1,
56
int16x8_t & v_dst) const
57
{
58
int16x4_t v_src0_p = vget_low_s16(v_src0), v_src1_p = vget_low_s16(v_src1);
59
float32x4_t v_sqr0 = vaddq_f32(vcvtq_f32_s32(vmull_s16(v_src0_p, v_src0_p)),
60
vcvtq_f32_s32(vmull_s16(v_src1_p, v_src1_p)));
61
v_src0_p = vget_high_s16(v_src0);
62
v_src1_p = vget_high_s16(v_src1);
63
float32x4_t v_sqr1 = vaddq_f32(vcvtq_f32_s32(vmull_s16(v_src0_p, v_src0_p)),
64
vcvtq_f32_s32(vmull_s16(v_src1_p, v_src1_p)));
65
66
int32x4_t v_sqrt0 = vcvtq_s32_f32(internal::vsqrtq_f32(v_sqr0));
67
int32x4_t v_sqrt1 = vcvtq_s32_f32(internal::vsqrtq_f32(v_sqr1));
68
69
v_dst = vcombine_s16(vqmovn_s32(v_sqrt0), vqmovn_s32(v_sqrt1));
70
}
71
72
void operator() (const int16x4_t & v_src0, const int16x4_t & v_src1,
73
int16x4_t & v_dst) const
74
{
75
float32x4_t v_tmp = vaddq_f32(vcvtq_f32_s32(vmull_s16(v_src0, v_src0)),
76
vcvtq_f32_s32(vmull_s16(v_src1, v_src1)));
77
int32x4_t v_sqrt = vcvtq_s32_f32(internal::vsqrtq_f32(v_tmp));
78
v_dst = vqmovn_s32(v_sqrt);
79
}
80
81
void operator() (const short * src0, const short * src1, short * dst) const
82
{
83
f32 src0val = (f32)src0[0], src1val = (f32)src1[0];
84
dst[0] = internal::saturate_cast<s16>((s32)sqrtf(src0val * src0val + src1val * src1val));
85
}
86
};
87
88
struct MagnitudeF32
89
{
90
typedef f32 type;
91
92
void operator() (const float32x4_t & v_src0, const float32x4_t & v_src1,
93
float32x4_t & v_dst) const
94
{
95
v_dst = internal::vsqrtq_f32(vaddq_f32(vmulq_f32(v_src0, v_src0), vmulq_f32(v_src1, v_src1)));
96
}
97
98
void operator() (const float32x2_t & v_src0, const float32x2_t & v_src1,
99
float32x2_t & v_dst) const
100
{
101
v_dst = internal::vsqrt_f32(vadd_f32(vmul_f32(v_src0, v_src0), vmul_f32(v_src1, v_src1)));
102
}
103
104
void operator() (const f32 * src0, const f32 * src1, f32 * dst) const
105
{
106
dst[0] = sqrtf(src0[0] * src0[0] + src1[0] * src1[0]);
107
}
108
};
109
110
} // namespace
111
112
#endif
113
114
void magnitude(const Size2D &size,
115
const s16 * src0Base, ptrdiff_t src0Stride,
116
const s16 * src1Base, ptrdiff_t src1Stride,
117
s16 * dstBase, ptrdiff_t dstStride)
118
{
119
internal::assertSupportedConfiguration();
120
#ifdef CAROTENE_NEON
121
internal::vtransform(size,
122
src0Base, src0Stride,
123
src1Base, src1Stride,
124
dstBase, dstStride,
125
Magnitude());
126
#else
127
(void)size;
128
(void)src0Base;
129
(void)src0Stride;
130
(void)src1Base;
131
(void)src1Stride;
132
(void)dstBase;
133
(void)dstStride;
134
#endif
135
}
136
137
void magnitude(const Size2D &size,
138
const f32 * src0Base, ptrdiff_t src0Stride,
139
const f32 * src1Base, ptrdiff_t src1Stride,
140
f32 * dstBase, ptrdiff_t dstStride)
141
{
142
internal::assertSupportedConfiguration();
143
#ifdef CAROTENE_NEON
144
internal::vtransform(size,
145
src0Base, src0Stride,
146
src1Base, src1Stride,
147
dstBase, dstStride,
148
MagnitudeF32());
149
#else
150
(void)size;
151
(void)src0Base;
152
(void)src0Stride;
153
(void)src1Base;
154
(void)src1Stride;
155
(void)dstBase;
156
(void)dstStride;
157
#endif
158
}
159
160
} // namespace CAROTENE_NS
161
162