Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/3rdparty/carotene/src/convert_depth.cpp
16337 views
1
/*
2
* By downloading, copying, installing or using the software you agree to this license.
3
* If you do not agree to this license, do not download, install,
4
* copy or use the software.
5
*
6
*
7
* License Agreement
8
* For Open Source Computer Vision Library
9
* (3-clause BSD License)
10
*
11
* Copyright (C) 2014, NVIDIA Corporation, all rights reserved.
12
* Third party copyrights are property of their respective owners.
13
*
14
* Redistribution and use in source and binary forms, with or without modification,
15
* are permitted provided that the following conditions are met:
16
*
17
* * Redistributions of source code must retain the above copyright notice,
18
* this list of conditions and the following disclaimer.
19
*
20
* * Redistributions in binary form must reproduce the above copyright notice,
21
* this list of conditions and the following disclaimer in the documentation
22
* and/or other materials provided with the distribution.
23
*
24
* * Neither the names of the copyright holders nor the names of the contributors
25
* may be used to endorse or promote products derived from this software
26
* without specific prior written permission.
27
*
28
* This software is provided by the copyright holders and contributors "as is" and
29
* any express or implied warranties, including, but not limited to, the implied
30
* warranties of merchantability and fitness for a particular purpose are disclaimed.
31
* In no event shall copyright holders or contributors be liable for any direct,
32
* indirect, incidental, special, exemplary, or consequential damages
33
* (including, but not limited to, procurement of substitute goods or services;
34
* loss of use, data, or profits; or business interruption) however caused
35
* and on any theory of liability, whether in contract, strict liability,
36
* or tort (including negligence or otherwise) arising in any way out of
37
* the use of this software, even if advised of the possibility of such damage.
38
*/
39
40
#include "common.hpp"
41
42
#include <cstring>
43
44
namespace CAROTENE_NS {
45
46
#ifdef CAROTENE_NEON
47
48
namespace {
49
50
template <int shift>
51
void lshiftConst(const Size2D &size,
52
const u8 * srcBase, ptrdiff_t srcStride,
53
s16 * dstBase, ptrdiff_t dstStride)
54
{
55
size_t roiw16 = size.width >= 15 ? size.width - 15 : 0;
56
size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
57
58
for (size_t i = 0; i < size.height; ++i)
59
{
60
const u8 * src = internal::getRowPtr(srcBase, srcStride, i);
61
s16 * dst = internal::getRowPtr(dstBase, dstStride, i);
62
size_t j = 0;
63
64
for (; j < roiw16; j += 16)
65
{
66
internal::prefetch(src + j);
67
uint8x16_t v_src = vld1q_u8(src + j);
68
int16x8_t v_dst0 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(v_src)));
69
int16x8_t v_dst1 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(v_src)));
70
71
vst1q_s16(dst + j, vshlq_n_s16(v_dst0, shift));
72
vst1q_s16(dst + j + 8, vshlq_n_s16(v_dst1, shift));
73
}
74
for (; j < roiw8; j += 8)
75
{
76
int16x8_t v_dst = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(src + j)));
77
vst1q_s16(dst + j, vshlq_n_s16(v_dst, shift));
78
}
79
80
for (; j < size.width; j++)
81
{
82
dst[j] = ((s16)src[j] << shift);
83
}
84
}
85
}
86
87
template <>
88
void lshiftConst<0>(const Size2D &size,
89
const u8 * srcBase, ptrdiff_t srcStride,
90
s16 * dstBase, ptrdiff_t dstStride)
91
{
92
size_t roiw16 = size.width >= 15 ? size.width - 15 : 0;
93
size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
94
95
for (size_t i = 0; i < size.height; ++i)
96
{
97
const u8 * src = internal::getRowPtr(srcBase, srcStride, i);
98
s16 * dst = internal::getRowPtr(dstBase, dstStride, i);
99
size_t j = 0;
100
101
for (; j < roiw16; j += 16)
102
{
103
internal::prefetch(src + j);
104
uint8x16_t v_src = vld1q_u8(src + j);
105
int16x8_t v_dst0 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(v_src)));
106
int16x8_t v_dst1 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(v_src)));
107
108
vst1q_s16(dst + j, v_dst0);
109
vst1q_s16(dst + j + 8, v_dst1);
110
}
111
for (; j < roiw8; j += 8)
112
{
113
int16x8_t v_dst = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(src + j)));
114
vst1q_s16(dst + j, v_dst);
115
}
116
117
for (; j < size.width; j++)
118
{
119
dst[j] = (s16)src[j];
120
}
121
}
122
}
123
124
template <int shift>
125
void rshiftConst(const Size2D &size,
126
const s16 * srcBase, ptrdiff_t srcStride,
127
u8 * dstBase, ptrdiff_t dstStride,
128
CONVERT_POLICY cpolicy)
129
{
130
size_t roiw16 = size.width >= 15 ? size.width - 15 : 0;
131
size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
132
133
for (size_t i = 0; i < size.height; ++i)
134
{
135
const s16 * src = internal::getRowPtr(srcBase, srcStride, i);
136
u8 * dst = internal::getRowPtr(dstBase, dstStride, i);
137
size_t j = 0;
138
139
if (cpolicy == CONVERT_POLICY_SATURATE)
140
{
141
for (; j < roiw16; j += 16)
142
{
143
internal::prefetch(src + j);
144
int16x8_t v_src0 = vshrq_n_s16(vld1q_s16(src + j), shift),
145
v_src1 = vshrq_n_s16(vld1q_s16(src + j + 8), shift);
146
uint8x16_t v_dst = vcombine_u8(vqmovun_s16(v_src0),
147
vqmovun_s16(v_src1));
148
vst1q_u8(dst + j, v_dst);
149
}
150
for (; j < roiw8; j += 8)
151
{
152
int16x8_t v_src = vshrq_n_s16(vld1q_s16(src + j), shift);
153
vst1_u8(dst + j, vqmovun_s16(v_src));
154
}
155
156
for (; j < size.width; j++)
157
{
158
dst[j] = internal::saturate_cast<u8>((src[j] >> shift));
159
}
160
}
161
else // CONVERT_POLICY_WRAP
162
{
163
for (; j < roiw16; j += 16)
164
{
165
internal::prefetch(src + j);
166
int16x8_t v_src0 = vshrq_n_s16(vld1q_s16(src + j), shift),
167
v_src1 = vshrq_n_s16(vld1q_s16(src + j + 8), shift);
168
int8x16_t v_dst = vcombine_s8(vmovn_s16(v_src0),
169
vmovn_s16(v_src1));
170
vst1q_u8(dst + j, vreinterpretq_u8_s8(v_dst));
171
}
172
for (; j < roiw8; j += 8)
173
{
174
int16x8_t v_src = vshrq_n_s16(vld1q_s16(src + j), shift);
175
vst1_u8(dst + j, vreinterpret_u8_s8(vmovn_s16(v_src)));
176
}
177
178
for (; j < size.width; j++)
179
{
180
dst[j] = (u8)((src[j] >> shift));
181
}
182
}
183
}
184
}
185
186
template <>
187
void rshiftConst<0>(const Size2D &size,
188
const s16 * srcBase, ptrdiff_t srcStride,
189
u8 * dstBase, ptrdiff_t dstStride,
190
CONVERT_POLICY cpolicy)
191
{
192
size_t roiw16 = size.width >= 15 ? size.width - 15 : 0;
193
size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
194
195
for (size_t i = 0; i < size.height; ++i)
196
{
197
const s16 * src = internal::getRowPtr(srcBase, srcStride, i);
198
u8 * dst = internal::getRowPtr(dstBase, dstStride, i);
199
size_t j = 0;
200
201
if (cpolicy == CONVERT_POLICY_SATURATE)
202
{
203
for (; j < roiw16; j += 16)
204
{
205
internal::prefetch(src + j);
206
int16x8_t v_src0 = vld1q_s16(src + j), v_src1 = vld1q_s16(src + j + 8);
207
uint8x16_t v_dst = vcombine_u8(vqmovun_s16(v_src0), vqmovun_s16(v_src1));
208
vst1q_u8(dst + j, v_dst);
209
}
210
for (; j < roiw8; j += 8)
211
{
212
int16x8_t v_src = vld1q_s16(src + j);
213
vst1_u8(dst + j, vqmovun_s16(v_src));
214
}
215
216
for (; j < size.width; j++)
217
{
218
dst[j] = internal::saturate_cast<u8>(src[j]);
219
}
220
}
221
else // CONVERT_POLICY_WRAP
222
{
223
for (; j < roiw16; j += 16)
224
{
225
internal::prefetch(src + j);
226
int16x8_t v_src0 = vld1q_s16(src + j), v_src1 = vld1q_s16(src + j + 8);
227
int8x16_t v_dst = vcombine_s8(vmovn_s16(v_src0), vmovn_s16(v_src1));
228
vst1q_u8(dst + j, vreinterpretq_u8_s8(v_dst));
229
}
230
for (; j < roiw8; j += 8)
231
{
232
int16x8_t v_src = vld1q_s16(src + j);
233
vst1_u8(dst + j, vreinterpret_u8_s8(vmovn_s16(v_src)));
234
}
235
236
for (; j < size.width; j++)
237
{
238
dst[j] = (u8)src[j];
239
}
240
}
241
}
242
}
243
244
typedef void (* lshiftConstFunc)(const Size2D &size,
245
const u8 * srcBase, ptrdiff_t srcStride,
246
s16 * dstBase, ptrdiff_t dstStride);
247
248
typedef void (* rshiftConstFunc)(const Size2D &size,
249
const s16 * srcBase, ptrdiff_t srcStride,
250
u8 * dstBase, ptrdiff_t dstStride,
251
CONVERT_POLICY cpolicy);
252
253
} // namespace
254
255
#endif
256
257
void lshift(const Size2D &size,
258
const u8 * srcBase, ptrdiff_t srcStride,
259
s16 * dstBase, ptrdiff_t dstStride,
260
u32 shift)
261
{
262
internal::assertSupportedConfiguration();
263
264
#ifdef CAROTENE_NEON
265
if (shift >= 16u)
266
{
267
for (size_t i = 0; i < size.height; ++i)
268
{
269
s16 * dst = internal::getRowPtr(dstBase, dstStride, i);
270
std::memset(dst, 0, sizeof(s16) * size.width);
271
}
272
return;
273
}
274
275
// this ugly contruction is needed to avoid:
276
// /usr/lib/gcc/arm-linux-gnueabihf/4.8/include/arm_neon.h:3581:59: error: argument must be a constant
277
// return (int16x8_t)__builtin_neon_vshl_nv8hi (__a, __b, 1);
278
279
lshiftConstFunc funcs[16] =
280
{
281
lshiftConst<0>,
282
lshiftConst<1>,
283
lshiftConst<2>,
284
lshiftConst<3>,
285
lshiftConst<4>,
286
lshiftConst<5>,
287
lshiftConst<6>,
288
lshiftConst<7>,
289
lshiftConst<8>,
290
lshiftConst<9>,
291
lshiftConst<10>,
292
lshiftConst<11>,
293
lshiftConst<12>,
294
lshiftConst<13>,
295
lshiftConst<14>,
296
lshiftConst<15>
297
}, func = funcs[shift];
298
299
func(size, srcBase, srcStride, dstBase, dstStride);
300
#else
301
(void)size;
302
(void)srcBase;
303
(void)srcStride;
304
(void)dstBase;
305
(void)dstStride;
306
(void)shift;
307
#endif
308
}
309
310
void rshift(const Size2D &size,
311
const s16 * srcBase, ptrdiff_t srcStride,
312
u8 * dstBase, ptrdiff_t dstStride,
313
u32 shift, CONVERT_POLICY cpolicy)
314
{
315
internal::assertSupportedConfiguration();
316
317
#ifdef CAROTENE_NEON
318
if (shift >= 16)
319
{
320
if (cpolicy == CONVERT_POLICY_WRAP)
321
{
322
size_t roiw16 = size.width >= 15 ? size.width - 15 : 0;
323
size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
324
int16x8_t v_zero = vdupq_n_s16(0);
325
326
for (size_t i = 0; i < size.height; ++i)
327
{
328
const s16 * src = internal::getRowPtr(srcBase, srcStride, i);
329
u8 * dst = internal::getRowPtr(dstBase, dstStride, i);
330
size_t j = 0;
331
332
for (; j < roiw16; j += 16)
333
{
334
internal::prefetch(src + j);
335
int16x8_t v_src0 = vld1q_s16(src + j), v_src1 = vld1q_s16(src + j + 8);
336
uint8x16_t v_dst = vcombine_u8(vmovn_u16(vcltq_s16(v_src0, v_zero)),
337
vmovn_u16(vcltq_s16(v_src1, v_zero)));
338
vst1q_u8(dst + j, v_dst);
339
}
340
for (; j < roiw8; j += 8)
341
{
342
int16x8_t v_src = vld1q_s16(src + j);
343
vst1_u8(dst + j, vmovn_u16(vcltq_s16(v_src, v_zero)));
344
}
345
346
for (; j < size.width; j++)
347
{
348
dst[j] = src[j] >= 0 ? 0 : 255;
349
}
350
}
351
}
352
else
353
{
354
for (size_t i = 0; i < size.height; ++i)
355
{
356
u8 * dst = internal::getRowPtr(dstBase, dstStride, i);
357
std::memset(dst, 0, sizeof(u8) * size.width);
358
}
359
}
360
return;
361
}
362
363
// this ugly contruction is needed to avoid:
364
// /usr/lib/gcc/arm-linux-gnueabihf/4.8/include/arm_neon.h:3581:59: error: argument must be a constant
365
// return (int16x8_t)__builtin_neon_vshr_nv8hi (__a, __b, 1);
366
367
rshiftConstFunc funcs[16] =
368
{
369
rshiftConst<0>,
370
rshiftConst<1>,
371
rshiftConst<2>,
372
rshiftConst<3>,
373
rshiftConst<4>,
374
rshiftConst<5>,
375
rshiftConst<6>,
376
rshiftConst<7>,
377
rshiftConst<8>,
378
rshiftConst<9>,
379
rshiftConst<10>,
380
rshiftConst<11>,
381
rshiftConst<12>,
382
rshiftConst<13>,
383
rshiftConst<14>,
384
rshiftConst<15>
385
}, func = funcs[shift];
386
387
func(size, srcBase, srcStride, dstBase, dstStride, cpolicy);
388
#else
389
(void)size;
390
(void)srcBase;
391
(void)srcStride;
392
(void)dstBase;
393
(void)dstStride;
394
(void)shift;
395
(void)cpolicy;
396
#endif
397
}
398
399
} // namespace CAROTENE_NS
400
401