Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/3rdparty/carotene/src/add_weighted.cpp
16337 views
1
/*
2
* By downloading, copying, installing or using the software you agree to this license.
3
* If you do not agree to this license, do not download, install,
4
* copy or use the software.
5
*
6
*
7
* License Agreement
8
* For Open Source Computer Vision Library
9
* (3-clause BSD License)
10
*
11
* Copyright (C) 2012-2015, NVIDIA Corporation, all rights reserved.
12
* Third party copyrights are property of their respective owners.
13
*
14
* Redistribution and use in source and binary forms, with or without modification,
15
* are permitted provided that the following conditions are met:
16
*
17
* * Redistributions of source code must retain the above copyright notice,
18
* this list of conditions and the following disclaimer.
19
*
20
* * Redistributions in binary form must reproduce the above copyright notice,
21
* this list of conditions and the following disclaimer in the documentation
22
* and/or other materials provided with the distribution.
23
*
24
* * Neither the names of the copyright holders nor the names of the contributors
25
* may be used to endorse or promote products derived from this software
26
* without specific prior written permission.
27
*
28
* This software is provided by the copyright holders and contributors "as is" and
29
* any express or implied warranties, including, but not limited to, the implied
30
* warranties of merchantability and fitness for a particular purpose are disclaimed.
31
* In no event shall copyright holders or contributors be liable for any direct,
32
* indirect, incidental, special, exemplary, or consequential damages
33
* (including, but not limited to, procurement of substitute goods or services;
34
* loss of use, data, or profits; or business interruption) however caused
35
* and on any theory of liability, whether in contract, strict liability,
36
* or tort (including negligence or otherwise) arising in any way out of
37
* the use of this software, even if advised of the possibility of such damage.
38
*/
39
40
#include "common.hpp"
41
#include "vtransform.hpp"
42
43
namespace CAROTENE_NS {
44
45
#ifdef CAROTENE_NEON
46
47
namespace {
48
49
using namespace internal;
50
51
template <typename T> struct TypeTraits;
52
template <> struct TypeTraits< u8> { typedef u16 wide; typedef u8 unsign; typedef uint8x16_t vec128; };
53
template <> struct TypeTraits< s8> { typedef s16 wide; typedef u8 unsign; typedef int8x16_t vec128; };
54
template <> struct TypeTraits<u16> { typedef u32 wide; typedef u8 narrow; typedef u16 unsign; typedef uint16x8_t vec128; };
55
template <> struct TypeTraits<s16> { typedef s32 wide; typedef s8 narrow; typedef u16 unsign; typedef int16x8_t vec128; };
56
template <> struct TypeTraits<u32> { typedef u64 wide; typedef u16 narrow; typedef u32 unsign; typedef uint32x4_t vec128; };
57
template <> struct TypeTraits<s32> { typedef s64 wide; typedef s16 narrow; typedef u32 unsign; typedef int32x4_t vec128; };
58
template <> struct TypeTraits<f32> { typedef f64 wide; typedef float32x4_t vec128; };
59
60
template <typename T> struct wAdd
61
{
62
typedef T type;
63
64
f32 alpha, beta, gamma;
65
typedef typename TypeTraits<T>::wide wtype;
66
wAdd<wtype> wideAdd;
67
wAdd(f32 _alpha, f32 _beta, f32 _gamma):
68
alpha(_alpha), beta(_beta), gamma(_gamma),
69
wideAdd(_alpha, _beta, _gamma) {}
70
71
void operator() (const typename VecTraits<T>::vec128 & v_src0,
72
const typename VecTraits<T>::vec128 & v_src1,
73
typename VecTraits<T>::vec128 & v_dst) const
74
{
75
typename VecTraits<wtype>::vec128 vrl, vrh;
76
wideAdd(vmovl( vget_low(v_src0)), vmovl( vget_low(v_src1)), vrl);
77
wideAdd(vmovl(vget_high(v_src0)), vmovl(vget_high(v_src1)), vrh);
78
79
v_dst = vcombine(vqmovn(vrl), vqmovn(vrh));
80
}
81
82
void operator() (const typename VecTraits<T>::vec64 & v_src0,
83
const typename VecTraits<T>::vec64 & v_src1,
84
typename VecTraits<T>::vec64 & v_dst) const
85
{
86
typename VecTraits<wtype>::vec128 vr;
87
wideAdd(vmovl(v_src0), vmovl(v_src1), vr);
88
89
v_dst = vqmovn(vr);
90
}
91
92
void operator() (const T * src0, const T * src1, T * dst) const
93
{
94
dst[0] = saturate_cast<T>(alpha*src0[0] + beta*src1[0] + gamma);
95
}
96
};
97
98
template <> struct wAdd<s32>
99
{
100
typedef s32 type;
101
102
f32 alpha, beta, gamma;
103
float32x4_t valpha, vbeta, vgamma;
104
wAdd(f32 _alpha, f32 _beta, f32 _gamma):
105
alpha(_alpha), beta(_beta), gamma(_gamma)
106
{
107
valpha = vdupq_n_f32(_alpha);
108
vbeta = vdupq_n_f32(_beta);
109
vgamma = vdupq_n_f32(_gamma + 0.5);
110
}
111
112
void operator() (const typename VecTraits<s32>::vec128 & v_src0,
113
const typename VecTraits<s32>::vec128 & v_src1,
114
typename VecTraits<s32>::vec128 & v_dst) const
115
{
116
float32x4_t vs1 = vcvtq_f32_s32(v_src0);
117
float32x4_t vs2 = vcvtq_f32_s32(v_src1);
118
119
vs1 = vmlaq_f32(vgamma, vs1, valpha);
120
vs1 = vmlaq_f32(vs1, vs2, vbeta);
121
v_dst = vcvtq_s32_f32(vs1);
122
}
123
124
void operator() (const typename VecTraits<s32>::vec64 & v_src0,
125
const typename VecTraits<s32>::vec64 & v_src1,
126
typename VecTraits<s32>::vec64 & v_dst) const
127
{
128
float32x2_t vs1 = vcvt_f32_s32(v_src0);
129
float32x2_t vs2 = vcvt_f32_s32(v_src1);
130
131
vs1 = vmla_f32(vget_low(vgamma), vs1, vget_low(valpha));
132
vs1 = vmla_f32(vs1, vs2, vget_low(vbeta));
133
v_dst = vcvt_s32_f32(vs1);
134
}
135
136
void operator() (const s32 * src0, const s32 * src1, s32 * dst) const
137
{
138
dst[0] = saturate_cast<s32>(alpha*src0[0] + beta*src1[0] + gamma);
139
}
140
};
141
142
template <> struct wAdd<u32>
143
{
144
typedef u32 type;
145
146
f32 alpha, beta, gamma;
147
float32x4_t valpha, vbeta, vgamma;
148
wAdd(f32 _alpha, f32 _beta, f32 _gamma):
149
alpha(_alpha), beta(_beta), gamma(_gamma)
150
{
151
valpha = vdupq_n_f32(_alpha);
152
vbeta = vdupq_n_f32(_beta);
153
vgamma = vdupq_n_f32(_gamma + 0.5);
154
}
155
156
void operator() (const typename VecTraits<u32>::vec128 & v_src0,
157
const typename VecTraits<u32>::vec128 & v_src1,
158
typename VecTraits<u32>::vec128 & v_dst) const
159
{
160
float32x4_t vs1 = vcvtq_f32_u32(v_src0);
161
float32x4_t vs2 = vcvtq_f32_u32(v_src1);
162
163
vs1 = vmlaq_f32(vgamma, vs1, valpha);
164
vs1 = vmlaq_f32(vs1, vs2, vbeta);
165
v_dst = vcvtq_u32_f32(vs1);
166
}
167
168
void operator() (const typename VecTraits<u32>::vec64 & v_src0,
169
const typename VecTraits<u32>::vec64 & v_src1,
170
typename VecTraits<u32>::vec64 & v_dst) const
171
{
172
float32x2_t vs1 = vcvt_f32_u32(v_src0);
173
float32x2_t vs2 = vcvt_f32_u32(v_src1);
174
175
vs1 = vmla_f32(vget_low(vgamma), vs1, vget_low(valpha));
176
vs1 = vmla_f32(vs1, vs2, vget_low(vbeta));
177
v_dst = vcvt_u32_f32(vs1);
178
}
179
180
void operator() (const u32 * src0, const u32 * src1, u32 * dst) const
181
{
182
dst[0] = saturate_cast<u32>(alpha*src0[0] + beta*src1[0] + gamma);
183
}
184
};
185
186
template <> struct wAdd<f32>
187
{
188
typedef f32 type;
189
190
f32 alpha, beta, gamma;
191
float32x4_t valpha, vbeta, vgamma;
192
wAdd(f32 _alpha, f32 _beta, f32 _gamma):
193
alpha(_alpha), beta(_beta), gamma(_gamma)
194
{
195
valpha = vdupq_n_f32(_alpha);
196
vbeta = vdupq_n_f32(_beta);
197
vgamma = vdupq_n_f32(_gamma + 0.5);
198
}
199
200
void operator() (const typename VecTraits<f32>::vec128 & v_src0,
201
const typename VecTraits<f32>::vec128 & v_src1,
202
typename VecTraits<f32>::vec128 & v_dst) const
203
{
204
float32x4_t vs1 = vmlaq_f32(vgamma, v_src0, valpha);
205
v_dst = vmlaq_f32(vs1, v_src1, vbeta);
206
}
207
208
void operator() (const typename VecTraits<f32>::vec64 & v_src0,
209
const typename VecTraits<f32>::vec64 & v_src1,
210
typename VecTraits<f32>::vec64 & v_dst) const
211
{
212
float32x2_t vs1 = vmla_f32(vget_low(vgamma), v_src0, vget_low(valpha));
213
v_dst = vmla_f32(vs1, v_src1, vget_low(vbeta));
214
215
}
216
217
void operator() (const f32 * src0, const f32 * src1, f32 * dst) const
218
{
219
dst[0] = alpha*src0[0] + beta*src1[0] + gamma;
220
}
221
};
222
223
} // namespace
224
225
#define IMPL_ADDWEIGHTED(type) \
226
void addWeighted(const Size2D &size, \
227
const type * src0Base, ptrdiff_t src0Stride, \
228
const type * src1Base, ptrdiff_t src1Stride, \
229
type * dstBase, ptrdiff_t dstStride, \
230
f32 alpha, f32 beta, f32 gamma) \
231
{ \
232
internal::assertSupportedConfiguration(); \
233
wAdd<type> wgtAdd(alpha, \
234
beta, \
235
gamma); \
236
internal::vtransform(size, \
237
src0Base, src0Stride, \
238
src1Base, src1Stride, \
239
dstBase, dstStride, \
240
wgtAdd); \
241
}
242
243
#else
244
245
#define IMPL_ADDWEIGHTED(type) \
246
void addWeighted(const Size2D &, \
247
const type *, ptrdiff_t, \
248
const type *, ptrdiff_t, \
249
type *, ptrdiff_t, \
250
f32, f32, f32) \
251
{ \
252
internal::assertSupportedConfiguration(); \
253
}
254
255
#endif
256
257
IMPL_ADDWEIGHTED(u8)
258
IMPL_ADDWEIGHTED(s8)
259
IMPL_ADDWEIGHTED(u16)
260
IMPL_ADDWEIGHTED(s16)
261
IMPL_ADDWEIGHTED(u32)
262
IMPL_ADDWEIGHTED(s32)
263
IMPL_ADDWEIGHTED(f32)
264
265
} // namespace CAROTENE_NS
266
267