CoCalc -- add_weighted.cpp

GitHub Repository: Tetragramm/opencv
Path: blob/master/3rdparty/carotene/src/add_weighted.cpp
¹⁶³³⁷ views
1
/*
2
 * By downloading, copying, installing or using the software you agree to this license.
3
 * If you do not agree to this license, do not download, install,
4
 * copy or use the software.
5
 *
6
 *
7
 *                           License Agreement
8
 *                For Open Source Computer Vision Library
9
 *                        (3-clause BSD License)
10
 *
11
 * Copyright (C) 2012-2015, NVIDIA Corporation, all rights reserved.
12
 * Third party copyrights are property of their respective owners.
13
 *
14
 * Redistribution and use in source and binary forms, with or without modification,
15
 * are permitted provided that the following conditions are met:
16
 *
17
 *   * Redistributions of source code must retain the above copyright notice,
18
 *     this list of conditions and the following disclaimer.
19
 *
20
 *   * Redistributions in binary form must reproduce the above copyright notice,
21
 *     this list of conditions and the following disclaimer in the documentation
22
 *     and/or other materials provided with the distribution.
23
 *
24
 *   * Neither the names of the copyright holders nor the names of the contributors
25
 *     may be used to endorse or promote products derived from this software
26
 *     without specific prior written permission.
27
 *
28
 * This software is provided by the copyright holders and contributors "as is" and
29
 * any express or implied warranties, including, but not limited to, the implied
30
 * warranties of merchantability and fitness for a particular purpose are disclaimed.
31
 * In no event shall copyright holders or contributors be liable for any direct,
32
 * indirect, incidental, special, exemplary, or consequential damages
33
 * (including, but not limited to, procurement of substitute goods or services;
34
 * loss of use, data, or profits; or business interruption) however caused
35
 * and on any theory of liability, whether in contract, strict liability,
36
 * or tort (including negligence or otherwise) arising in any way out of
37
 * the use of this software, even if advised of the possibility of such damage.
38
 */
39

40
#include "common.hpp"
41
#include "vtransform.hpp"
42

43
namespace CAROTENE_NS {
44

45
#ifdef CAROTENE_NEON
46

47
namespace {
48

49
using namespace internal;
50

51
template <typename T> struct TypeTraits;
52
template <> struct TypeTraits< u8> { typedef u16 wide;                     typedef  u8 unsign; typedef  uint8x16_t vec128; };
53
template <> struct TypeTraits< s8> { typedef s16 wide;                     typedef  u8 unsign; typedef   int8x16_t vec128; };
54
template <> struct TypeTraits<u16> { typedef u32 wide; typedef  u8 narrow; typedef u16 unsign; typedef  uint16x8_t vec128; };
55
template <> struct TypeTraits<s16> { typedef s32 wide; typedef  s8 narrow; typedef u16 unsign; typedef   int16x8_t vec128; };
56
template <> struct TypeTraits<u32> { typedef u64 wide; typedef u16 narrow; typedef u32 unsign; typedef  uint32x4_t vec128; };
57
template <> struct TypeTraits<s32> { typedef s64 wide; typedef s16 narrow; typedef u32 unsign; typedef   int32x4_t vec128; };
58
template <> struct TypeTraits<f32> { typedef f64 wide;                                         typedef float32x4_t vec128; };
59

60
template <typename T> struct wAdd
61
{
62
    typedef T type;
63

64
    f32 alpha, beta, gamma;
65
    typedef typename TypeTraits<T>::wide wtype;
66
    wAdd<wtype> wideAdd;
67
    wAdd(f32 _alpha, f32 _beta, f32 _gamma):
68
        alpha(_alpha), beta(_beta), gamma(_gamma),
69
        wideAdd(_alpha, _beta, _gamma) {}
70

71
    void operator() (const typename VecTraits<T>::vec128 & v_src0,
72
                     const typename VecTraits<T>::vec128 & v_src1,
73
                     typename VecTraits<T>::vec128 & v_dst) const
74
    {
75
        typename VecTraits<wtype>::vec128 vrl, vrh;
76
        wideAdd(vmovl( vget_low(v_src0)), vmovl( vget_low(v_src1)), vrl);
77
        wideAdd(vmovl(vget_high(v_src0)), vmovl(vget_high(v_src1)), vrh);
78

79
        v_dst = vcombine(vqmovn(vrl), vqmovn(vrh));
80
    }
81

82
    void operator() (const typename VecTraits<T>::vec64 & v_src0,
83
                     const typename VecTraits<T>::vec64 & v_src1,
84
                     typename VecTraits<T>::vec64 & v_dst) const
85
    {
86
        typename VecTraits<wtype>::vec128 vr;
87
        wideAdd(vmovl(v_src0), vmovl(v_src1), vr);
88

89
        v_dst = vqmovn(vr);
90
    }
91

92
    void operator() (const T * src0, const T * src1, T * dst) const
93
    {
94
        dst[0] = saturate_cast<T>(alpha*src0[0] + beta*src1[0] + gamma);
95
    }
96
};
97

98
template <> struct wAdd<s32>
99
{
100
    typedef s32 type;
101

102
    f32 alpha, beta, gamma;
103
    float32x4_t valpha, vbeta, vgamma;
104
    wAdd(f32 _alpha, f32 _beta, f32 _gamma):
105
        alpha(_alpha), beta(_beta), gamma(_gamma)
106
    {
107
        valpha = vdupq_n_f32(_alpha);
108
        vbeta = vdupq_n_f32(_beta);
109
        vgamma = vdupq_n_f32(_gamma + 0.5);
110
    }
111

112
    void operator() (const typename VecTraits<s32>::vec128 & v_src0,
113
                     const typename VecTraits<s32>::vec128 & v_src1,
114
                     typename VecTraits<s32>::vec128 & v_dst) const
115
    {
116
        float32x4_t vs1 = vcvtq_f32_s32(v_src0);
117
        float32x4_t vs2 = vcvtq_f32_s32(v_src1);
118

119
        vs1 = vmlaq_f32(vgamma, vs1, valpha);
120
        vs1 = vmlaq_f32(vs1, vs2, vbeta);
121
        v_dst = vcvtq_s32_f32(vs1);
122
    }
123

124
    void operator() (const typename VecTraits<s32>::vec64 & v_src0,
125
                     const typename VecTraits<s32>::vec64 & v_src1,
126
                     typename VecTraits<s32>::vec64 & v_dst) const
127
    {
128
        float32x2_t vs1 = vcvt_f32_s32(v_src0);
129
        float32x2_t vs2 = vcvt_f32_s32(v_src1);
130

131
        vs1 = vmla_f32(vget_low(vgamma), vs1, vget_low(valpha));
132
        vs1 = vmla_f32(vs1, vs2, vget_low(vbeta));
133
        v_dst = vcvt_s32_f32(vs1);
134
    }
135

136
    void operator() (const s32 * src0, const s32 * src1, s32 * dst) const
137
    {
138
        dst[0] = saturate_cast<s32>(alpha*src0[0] + beta*src1[0] + gamma);
139
    }
140
};
141

142
template <> struct wAdd<u32>
143
{
144
    typedef u32 type;
145

146
    f32 alpha, beta, gamma;
147
    float32x4_t valpha, vbeta, vgamma;
148
    wAdd(f32 _alpha, f32 _beta, f32 _gamma):
149
        alpha(_alpha), beta(_beta), gamma(_gamma)
150
    {
151
        valpha = vdupq_n_f32(_alpha);
152
        vbeta = vdupq_n_f32(_beta);
153
        vgamma = vdupq_n_f32(_gamma + 0.5);
154
    }
155

156
    void operator() (const typename VecTraits<u32>::vec128 & v_src0,
157
                     const typename VecTraits<u32>::vec128 & v_src1,
158
                     typename VecTraits<u32>::vec128 & v_dst) const
159
    {
160
        float32x4_t vs1 = vcvtq_f32_u32(v_src0);
161
        float32x4_t vs2 = vcvtq_f32_u32(v_src1);
162

163
        vs1 = vmlaq_f32(vgamma, vs1, valpha);
164
        vs1 = vmlaq_f32(vs1, vs2, vbeta);
165
        v_dst = vcvtq_u32_f32(vs1);
166
    }
167

168
    void operator() (const typename VecTraits<u32>::vec64 & v_src0,
169
                     const typename VecTraits<u32>::vec64 & v_src1,
170
                     typename VecTraits<u32>::vec64 & v_dst) const
171
    {
172
        float32x2_t vs1 = vcvt_f32_u32(v_src0);
173
        float32x2_t vs2 = vcvt_f32_u32(v_src1);
174

175
        vs1 = vmla_f32(vget_low(vgamma), vs1, vget_low(valpha));
176
        vs1 = vmla_f32(vs1, vs2, vget_low(vbeta));
177
        v_dst = vcvt_u32_f32(vs1);
178
    }
179

180
    void operator() (const u32 * src0, const u32 * src1, u32 * dst) const
181
    {
182
        dst[0] = saturate_cast<u32>(alpha*src0[0] + beta*src1[0] + gamma);
183
    }
184
};
185

186
template <> struct wAdd<f32>
187
{
188
    typedef f32 type;
189

190
    f32 alpha, beta, gamma;
191
    float32x4_t valpha, vbeta, vgamma;
192
    wAdd(f32 _alpha, f32 _beta, f32 _gamma):
193
        alpha(_alpha), beta(_beta), gamma(_gamma)
194
    {
195
        valpha = vdupq_n_f32(_alpha);
196
        vbeta = vdupq_n_f32(_beta);
197
        vgamma = vdupq_n_f32(_gamma + 0.5);
198
    }
199

200
    void operator() (const typename VecTraits<f32>::vec128 & v_src0,
201
                     const typename VecTraits<f32>::vec128 & v_src1,
202
                     typename VecTraits<f32>::vec128 & v_dst) const
203
    {
204
        float32x4_t vs1 = vmlaq_f32(vgamma, v_src0, valpha);
205
        v_dst = vmlaq_f32(vs1, v_src1, vbeta);
206
    }
207

208
    void operator() (const typename VecTraits<f32>::vec64 & v_src0,
209
                     const typename VecTraits<f32>::vec64 & v_src1,
210
                     typename VecTraits<f32>::vec64 & v_dst) const
211
    {
212
        float32x2_t vs1 = vmla_f32(vget_low(vgamma), v_src0, vget_low(valpha));
213
        v_dst = vmla_f32(vs1, v_src1, vget_low(vbeta));
214

215
    }
216

217
    void operator() (const f32 * src0, const f32 * src1, f32 * dst) const
218
    {
219
        dst[0] = alpha*src0[0] + beta*src1[0] + gamma;
220
    }
221
};
222

223
} // namespace
224

225
#define IMPL_ADDWEIGHTED(type)                                \
226
void addWeighted(const Size2D &size,                          \
227
                 const type * src0Base, ptrdiff_t src0Stride, \
228
                 const type * src1Base, ptrdiff_t src1Stride, \
229
                 type * dstBase, ptrdiff_t dstStride,         \
230
                 f32 alpha, f32 beta, f32 gamma)              \
231
{                                                             \
232
    internal::assertSupportedConfiguration();                 \
233
    wAdd<type> wgtAdd(alpha,                                  \
234
                      beta,                                   \
235
                      gamma);                                 \
236
    internal::vtransform(size,                                \
237
                         src0Base, src0Stride,                \
238
                         src1Base, src1Stride,                \
239
                         dstBase, dstStride,                  \
240
                         wgtAdd);                             \
241
}
242

243
#else
244

245
#define IMPL_ADDWEIGHTED(type)                                \
246
void addWeighted(const Size2D &,                              \
247
                 const type *, ptrdiff_t,                     \
248
                 const type *, ptrdiff_t,                     \
249
                 type *, ptrdiff_t,                           \
250
                 f32, f32, f32)                               \
251
{                                                             \
252
    internal::assertSupportedConfiguration();                 \
253
}
254

255
#endif
256

257
IMPL_ADDWEIGHTED(u8)
258
IMPL_ADDWEIGHTED(s8)
259
IMPL_ADDWEIGHTED(u16)
260
IMPL_ADDWEIGHTED(s16)
261
IMPL_ADDWEIGHTED(u32)
262
IMPL_ADDWEIGHTED(s32)
263
IMPL_ADDWEIGHTED(f32)
264

265
} // namespace CAROTENE_NS
266

267
Product

Resources

Company