Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/3rdparty/carotene/src/add.cpp
16337 views
1
/*
2
* By downloading, copying, installing or using the software you agree to this license.
3
* If you do not agree to this license, do not download, install,
4
* copy or use the software.
5
*
6
*
7
* License Agreement
8
* For Open Source Computer Vision Library
9
* (3-clause BSD License)
10
*
11
* Copyright (C) 2014, NVIDIA Corporation, all rights reserved.
12
* Third party copyrights are property of their respective owners.
13
*
14
* Redistribution and use in source and binary forms, with or without modification,
15
* are permitted provided that the following conditions are met:
16
*
17
* * Redistributions of source code must retain the above copyright notice,
18
* this list of conditions and the following disclaimer.
19
*
20
* * Redistributions in binary form must reproduce the above copyright notice,
21
* this list of conditions and the following disclaimer in the documentation
22
* and/or other materials provided with the distribution.
23
*
24
* * Neither the names of the copyright holders nor the names of the contributors
25
* may be used to endorse or promote products derived from this software
26
* without specific prior written permission.
27
*
28
* This software is provided by the copyright holders and contributors "as is" and
29
* any express or implied warranties, including, but not limited to, the implied
30
* warranties of merchantability and fitness for a particular purpose are disclaimed.
31
* In no event shall copyright holders or contributors be liable for any direct,
32
* indirect, incidental, special, exemplary, or consequential damages
33
* (including, but not limited to, procurement of substitute goods or services;
34
* loss of use, data, or profits; or business interruption) however caused
35
* and on any theory of liability, whether in contract, strict liability,
36
* or tort (including negligence or otherwise) arising in any way out of
37
* the use of this software, even if advised of the possibility of such damage.
38
*/
39
40
#include "common.hpp"
41
#include "vtransform.hpp"
42
43
namespace CAROTENE_NS {
44
45
#ifdef CAROTENE_NEON
46
47
namespace {
48
49
template <typename T, typename WT>
50
struct AddWrap
51
{
52
typedef T type;
53
54
void operator() (const typename internal::VecTraits<T>::vec128 & v_src0,
55
const typename internal::VecTraits<T>::vec128 & v_src1,
56
typename internal::VecTraits<T>::vec128 & v_dst) const
57
{
58
v_dst = internal::vaddq(v_src0, v_src1);
59
}
60
61
void operator() (const typename internal::VecTraits<T>::vec64 & v_src0,
62
const typename internal::VecTraits<T>::vec64 & v_src1,
63
typename internal::VecTraits<T>::vec64 & v_dst) const
64
{
65
v_dst = internal::vadd(v_src0, v_src1);
66
}
67
68
void operator() (const T * src0, const T * src1, T * dst) const
69
{
70
dst[0] = (T)((WT)src0[0] + (WT)src1[0]);
71
}
72
};
73
74
template <typename T, typename WT>
75
struct AddSaturate
76
{
77
typedef T type;
78
79
void operator() (const typename internal::VecTraits<T>::vec128 & v_src0,
80
const typename internal::VecTraits<T>::vec128 & v_src1,
81
typename internal::VecTraits<T>::vec128 & v_dst) const
82
{
83
v_dst = internal::vqaddq(v_src0, v_src1);
84
}
85
86
void operator() (const typename internal::VecTraits<T>::vec64 & v_src0,
87
const typename internal::VecTraits<T>::vec64 & v_src1,
88
typename internal::VecTraits<T>::vec64 & v_dst) const
89
{
90
v_dst = internal::vqadd(v_src0, v_src1);
91
}
92
93
void operator() (const T * src0, const T * src1, T * dst) const
94
{
95
dst[0] = internal::saturate_cast<T>((WT)src0[0] + (WT)src1[0]);
96
}
97
};
98
99
} // namespace
100
101
#endif
102
103
void add(const Size2D &size,
104
const u8 * src0Base, ptrdiff_t src0Stride,
105
const u8 * src1Base, ptrdiff_t src1Stride,
106
u8 *dstBase, ptrdiff_t dstStride,
107
CONVERT_POLICY policy)
108
{
109
internal::assertSupportedConfiguration();
110
#ifdef CAROTENE_NEON
111
if (policy == CONVERT_POLICY_SATURATE)
112
{
113
internal::vtransform(size,
114
src0Base, src0Stride,
115
src1Base, src1Stride,
116
dstBase, dstStride,
117
AddSaturate<u8, u16>());
118
}
119
else
120
{
121
internal::vtransform(size,
122
src0Base, src0Stride,
123
src1Base, src1Stride,
124
dstBase, dstStride,
125
AddWrap<u8, u16>());
126
}
127
#else
128
(void)size;
129
(void)src0Base;
130
(void)src0Stride;
131
(void)src1Base;
132
(void)src1Stride;
133
(void)dstBase;
134
(void)dstStride;
135
(void)policy;
136
#endif
137
}
138
139
void add(const Size2D &size,
140
const s8 * src0Base, ptrdiff_t src0Stride,
141
const s8 * src1Base, ptrdiff_t src1Stride,
142
s8 *dstBase, ptrdiff_t dstStride,
143
CONVERT_POLICY policy)
144
{
145
internal::assertSupportedConfiguration();
146
#ifdef CAROTENE_NEON
147
if (policy == CONVERT_POLICY_SATURATE)
148
{
149
internal::vtransform(size,
150
src0Base, src0Stride,
151
src1Base, src1Stride,
152
dstBase, dstStride,
153
AddSaturate<s8, s16>());
154
}
155
else
156
{
157
internal::vtransform(size,
158
src0Base, src0Stride,
159
src1Base, src1Stride,
160
dstBase, dstStride,
161
AddWrap<s8, s16>());
162
}
163
#else
164
(void)size;
165
(void)src0Base;
166
(void)src0Stride;
167
(void)src1Base;
168
(void)src1Stride;
169
(void)dstBase;
170
(void)dstStride;
171
(void)policy;
172
#endif
173
}
174
175
void add(const Size2D &size,
176
const u8 * src0Base, ptrdiff_t src0Stride,
177
const u8 * src1Base, ptrdiff_t src1Stride,
178
s16 *dstBase, ptrdiff_t dstStride,
179
CONVERT_POLICY)
180
{
181
internal::assertSupportedConfiguration();
182
#ifdef CAROTENE_NEON
183
size_t roiw32 = size.width >= 31 ? size.width - 31 : 0;
184
size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
185
186
for (size_t i = 0; i < size.height; ++i)
187
{
188
const u8 * src0 = internal::getRowPtr(src0Base, src0Stride, i);
189
const u8 * src1 = internal::getRowPtr(src1Base, src1Stride, i);
190
u16 * dst = internal::getRowPtr((u16 *)dstBase, dstStride, i);
191
size_t j = 0;
192
193
for (; j < roiw32; j += 32)
194
{
195
internal::prefetch(src0 + j);
196
internal::prefetch(src1 + j);
197
uint8x16_t v_src00 = vld1q_u8(src0 + j), v_src01 = vld1q_u8(src0 + j + 16);
198
uint8x16_t v_src10 = vld1q_u8(src1 + j), v_src11 = vld1q_u8(src1 + j + 16);
199
vst1q_u16(dst + j, vaddl_u8(vget_low_u8(v_src00), vget_low_u8(v_src10)));
200
vst1q_u16(dst + j + 8, vaddl_u8(vget_high_u8(v_src00), vget_high_u8(v_src10)));
201
vst1q_u16(dst + j + 16, vaddl_u8(vget_low_u8(v_src01), vget_low_u8(v_src11)));
202
vst1q_u16(dst + j + 24, vaddl_u8(vget_high_u8(v_src01), vget_high_u8(v_src11)));
203
}
204
for (; j < roiw8; j += 8)
205
{
206
uint8x8_t v_src0 = vld1_u8(src0 + j);
207
uint8x8_t v_src1 = vld1_u8(src1 + j);
208
vst1q_u16(dst + j, vaddl_u8(v_src0, v_src1));
209
}
210
211
for (; j < size.width; j++)
212
dst[j] = (u16)src0[j] + (u16)src1[j];
213
}
214
#else
215
(void)size;
216
(void)src0Base;
217
(void)src0Stride;
218
(void)src1Base;
219
(void)src1Stride;
220
(void)dstBase;
221
(void)dstStride;
222
#endif
223
}
224
225
void add(const Size2D &size,
226
const u8 * src0Base, ptrdiff_t src0Stride,
227
const s16 * src1Base, ptrdiff_t src1Stride,
228
s16 *dstBase, ptrdiff_t dstStride,
229
CONVERT_POLICY policy)
230
{
231
internal::assertSupportedConfiguration();
232
#ifdef CAROTENE_NEON
233
size_t roiw16 = size.width >= 15 ? size.width - 15 : 0;
234
size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
235
236
for (size_t i = 0; i < size.height; ++i)
237
{
238
const u8 * src0 = internal::getRowPtr(src0Base, src0Stride, i);
239
const s16 * src1 = internal::getRowPtr(src1Base, src1Stride, i);
240
s16 * dst = internal::getRowPtr(dstBase, dstStride, i);
241
size_t j = 0;
242
243
if (policy == CONVERT_POLICY_SATURATE)
244
{
245
for (; j < roiw16; j += 16)
246
{
247
internal::prefetch(src0 + j);
248
internal::prefetch(src1 + j);
249
uint8x16_t v_src0 = vld1q_u8(src0 + j);
250
int16x8_t v_src00 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(v_src0)));
251
int16x8_t v_src01 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(v_src0)));
252
int16x8_t v_src10 = vld1q_s16(src1 + j), v_src11 = vld1q_s16(src1 + j + 8);
253
int16x8_t v_dst0 = vqaddq_s16(v_src00, v_src10);
254
int16x8_t v_dst1 = vqaddq_s16(v_src01, v_src11);
255
vst1q_s16(dst + j, v_dst0);
256
vst1q_s16(dst + j + 8, v_dst1);
257
}
258
for (; j < roiw8; j += 8)
259
{
260
int16x8_t v_src0 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(src0 + j)));
261
int16x8_t v_src1 = vld1q_s16(src1 + j);
262
int16x8_t v_dst = vqaddq_s16(v_src0, v_src1);
263
vst1q_s16(dst + j, v_dst);
264
}
265
266
for (; j < size.width; j++)
267
dst[j] = internal::saturate_cast<s16>((s32)src0[j] + (s32)src1[j]);
268
}
269
else
270
{
271
for (; j < roiw16; j += 16)
272
{
273
internal::prefetch(src0 + j);
274
internal::prefetch(src1 + j);
275
uint8x16_t v_src0 = vld1q_u8(src0 + j);
276
int16x8_t v_src00 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(v_src0)));
277
int16x8_t v_src01 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(v_src0)));
278
int16x8_t v_src10 = vld1q_s16(src1 + j), v_src11 = vld1q_s16(src1 + j + 8);
279
int16x8_t v_dst0 = vaddq_s16(v_src00, v_src10);
280
int16x8_t v_dst1 = vaddq_s16(v_src01, v_src11);
281
vst1q_s16(dst + j, v_dst0);
282
vst1q_s16(dst + j + 8, v_dst1);
283
}
284
for (; j < roiw8; j += 8)
285
{
286
int16x8_t v_src0 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(src0 + j)));
287
int16x8_t v_src1 = vld1q_s16(src1 + j);
288
int16x8_t v_dst = vaddq_s16(v_src0, v_src1);
289
vst1q_s16(dst + j, v_dst);
290
}
291
292
for (; j < size.width; j++)
293
dst[j] = (s16)((s32)src0[j] + (s32)src1[j]);
294
}
295
}
296
#else
297
(void)size;
298
(void)src0Base;
299
(void)src0Stride;
300
(void)src1Base;
301
(void)src1Stride;
302
(void)dstBase;
303
(void)dstStride;
304
(void)policy;
305
#endif
306
}
307
308
void add(const Size2D &size,
309
const s16 * src0Base, ptrdiff_t src0Stride,
310
const s16 * src1Base, ptrdiff_t src1Stride,
311
s16 *dstBase, ptrdiff_t dstStride,
312
CONVERT_POLICY policy)
313
{
314
internal::assertSupportedConfiguration();
315
#ifdef CAROTENE_NEON
316
if (policy == CONVERT_POLICY_SATURATE)
317
{
318
internal::vtransform(size,
319
src0Base, src0Stride,
320
src1Base, src1Stride,
321
dstBase, dstStride,
322
AddSaturate<s16, s32>());
323
}
324
else
325
{
326
internal::vtransform(size,
327
src0Base, src0Stride,
328
src1Base, src1Stride,
329
dstBase, dstStride,
330
AddWrap<s16, s32>());
331
}
332
#else
333
(void)size;
334
(void)src0Base;
335
(void)src0Stride;
336
(void)src1Base;
337
(void)src1Stride;
338
(void)dstBase;
339
(void)dstStride;
340
(void)policy;
341
#endif
342
}
343
344
void add(const Size2D &size,
345
const u16 * src0Base, ptrdiff_t src0Stride,
346
const u16 * src1Base, ptrdiff_t src1Stride,
347
u16 * dstBase, ptrdiff_t dstStride,
348
CONVERT_POLICY policy)
349
{
350
internal::assertSupportedConfiguration();
351
#ifdef CAROTENE_NEON
352
if (policy == CONVERT_POLICY_SATURATE)
353
{
354
internal::vtransform(size,
355
src0Base, src0Stride,
356
src1Base, src1Stride,
357
dstBase, dstStride,
358
AddSaturate<u16, u32>());
359
}
360
else
361
{
362
internal::vtransform(size,
363
src0Base, src0Stride,
364
src1Base, src1Stride,
365
dstBase, dstStride,
366
AddWrap<u16, u32>());
367
}
368
#else
369
(void)size;
370
(void)src0Base;
371
(void)src0Stride;
372
(void)src1Base;
373
(void)src1Stride;
374
(void)dstBase;
375
(void)dstStride;
376
(void)policy;
377
#endif
378
}
379
380
void add(const Size2D &size,
381
const s32 * src0Base, ptrdiff_t src0Stride,
382
const s32 * src1Base, ptrdiff_t src1Stride,
383
s32 *dstBase, ptrdiff_t dstStride,
384
CONVERT_POLICY policy)
385
{
386
internal::assertSupportedConfiguration();
387
#ifdef CAROTENE_NEON
388
if (policy == CONVERT_POLICY_SATURATE)
389
{
390
internal::vtransform(size,
391
src0Base, src0Stride,
392
src1Base, src1Stride,
393
dstBase, dstStride,
394
AddSaturate<s32, s64>());
395
}
396
else
397
{
398
internal::vtransform(size,
399
src0Base, src0Stride,
400
src1Base, src1Stride,
401
dstBase, dstStride,
402
AddWrap<s32, s64>());
403
}
404
#else
405
(void)size;
406
(void)src0Base;
407
(void)src0Stride;
408
(void)src1Base;
409
(void)src1Stride;
410
(void)dstBase;
411
(void)dstStride;
412
(void)policy;
413
#endif
414
}
415
416
void add(const Size2D &size,
417
const u32 * src0Base, ptrdiff_t src0Stride,
418
const u32 * src1Base, ptrdiff_t src1Stride,
419
u32 * dstBase, ptrdiff_t dstStride,
420
CONVERT_POLICY policy)
421
{
422
internal::assertSupportedConfiguration();
423
#ifdef CAROTENE_NEON
424
if (policy == CONVERT_POLICY_SATURATE)
425
{
426
internal::vtransform(size,
427
src0Base, src0Stride,
428
src1Base, src1Stride,
429
dstBase, dstStride,
430
AddSaturate<u32, u64>());
431
}
432
else
433
{
434
internal::vtransform(size,
435
src0Base, src0Stride,
436
src1Base, src1Stride,
437
dstBase, dstStride,
438
AddWrap<u32, u64>());
439
}
440
#else
441
(void)size;
442
(void)src0Base;
443
(void)src0Stride;
444
(void)src1Base;
445
(void)src1Stride;
446
(void)dstBase;
447
(void)dstStride;
448
(void)policy;
449
#endif
450
}
451
452
void add(const Size2D &size,
453
const f32 * src0Base, ptrdiff_t src0Stride,
454
const f32 * src1Base, ptrdiff_t src1Stride,
455
f32 * dstBase, ptrdiff_t dstStride)
456
{
457
internal::assertSupportedConfiguration();
458
#ifdef CAROTENE_NEON
459
internal::vtransform(size,
460
src0Base, src0Stride,
461
src1Base, src1Stride,
462
dstBase, dstStride,
463
AddWrap<f32, f32>());
464
#else
465
(void)size;
466
(void)src0Base;
467
(void)src0Stride;
468
(void)src1Base;
469
(void)src1Stride;
470
(void)dstBase;
471
(void)dstStride;
472
#endif
473
}
474
475
} // namespace CAROTENE_NS
476
477