Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/3rdparty/carotene/src/cmp.cpp
16337 views
1
/*
2
* By downloading, copying, installing or using the software you agree to this license.
3
* If you do not agree to this license, do not download, install,
4
* copy or use the software.
5
*
6
*
7
* License Agreement
8
* For Open Source Computer Vision Library
9
* (3-clause BSD License)
10
*
11
* Copyright (C) 2014-2015, NVIDIA Corporation, all rights reserved.
12
* Third party copyrights are property of their respective owners.
13
*
14
* Redistribution and use in source and binary forms, with or without modification,
15
* are permitted provided that the following conditions are met:
16
*
17
* * Redistributions of source code must retain the above copyright notice,
18
* this list of conditions and the following disclaimer.
19
*
20
* * Redistributions in binary form must reproduce the above copyright notice,
21
* this list of conditions and the following disclaimer in the documentation
22
* and/or other materials provided with the distribution.
23
*
24
* * Neither the names of the copyright holders nor the names of the contributors
25
* may be used to endorse or promote products derived from this software
26
* without specific prior written permission.
27
*
28
* This software is provided by the copyright holders and contributors "as is" and
29
* any express or implied warranties, including, but not limited to, the implied
30
* warranties of merchantability and fitness for a particular purpose are disclaimed.
31
* In no event shall copyright holders or contributors be liable for any direct,
32
* indirect, incidental, special, exemplary, or consequential damages
33
* (including, but not limited to, procurement of substitute goods or services;
34
* loss of use, data, or profits; or business interruption) however caused
35
* and on any theory of liability, whether in contract, strict liability,
36
* or tort (including negligence or otherwise) arising in any way out of
37
* the use of this software, even if advised of the possibility of such damage.
38
*/
39
40
#include "common.hpp"
41
#include "vtransform.hpp"
42
43
namespace CAROTENE_NS {
44
45
#ifdef CAROTENE_NEON
46
47
namespace {
48
49
inline void vnst(u8* dst, uint8x16_t v1, uint8x16_t v2) { vst1q_u8(dst, v1); vst1q_u8(dst+16, v2); }
50
inline void vnst(u8* dst, uint16x8_t v1, uint16x8_t v2) { vst1q_u8(dst, vcombine_u8(vmovn_u16(v1), vmovn_u16(v2))); }
51
inline void vnst(u8* dst, uint32x4_t v1, uint32x4_t v2) { vst1_u8(dst, vmovn_u16(vcombine_u16(vmovn_u32(v1), vmovn_u32(v2)))); }
52
53
template <typename Op, int elsize> struct vtail
54
{
55
static inline void compare(const typename Op::type * src0, const typename Op::type * src1,
56
u8 * dst, const Op & op,
57
size_t &x, size_t width)
58
{
59
//do nothing since there couldn't be enough data
60
(void)src0;
61
(void)src1;
62
(void)dst;
63
(void)op;
64
(void)x;
65
(void)width;
66
}
67
};
68
template <typename Op> struct vtail<Op, 2>
69
{
70
static inline void compare(const typename Op::type * src0, const typename Op::type * src1,
71
u8 * dst, const Op & op,
72
size_t &x, size_t width)
73
{
74
typedef typename Op::type type;
75
typedef typename internal::VecTraits<type>::vec128 vec128;
76
typedef typename internal::VecTraits<type>::unsign::vec128 uvec128;
77
//There no more than 15 elements in the tail, so we could handle 8 element vector only once
78
if( x + 8 < width)
79
{
80
vec128 v_src0, v_src1;
81
uvec128 v_dst;
82
83
v_src0 = internal::vld1q(src0 + x);
84
v_src1 = internal::vld1q(src1 + x);
85
op(v_src0, v_src1, v_dst);
86
internal::vst1(dst + x, internal::vmovn(v_dst));
87
x+=8;
88
}
89
}
90
};
91
template <typename Op> struct vtail<Op, 1>
92
{
93
static inline void compare(const typename Op::type * src0, const typename Op::type * src1,
94
u8 * dst, const Op & op,
95
size_t &x, size_t width)
96
{
97
typedef typename Op::type type;
98
typedef typename internal::VecTraits<type>::vec128 vec128;
99
typedef typename internal::VecTraits<type>::unsign::vec128 uvec128;
100
typedef typename internal::VecTraits<type>::vec64 vec64;
101
typedef typename internal::VecTraits<type>::unsign::vec64 uvec64;
102
//There no more than 31 elements in the tail, so we could handle once 16+8 or 16 or 8 elements
103
if( x + 16 < width)
104
{
105
vec128 v_src0, v_src1;
106
uvec128 v_dst;
107
108
v_src0 = internal::vld1q(src0 + x);
109
v_src1 = internal::vld1q(src1 + x);
110
op(v_src0, v_src1, v_dst);
111
internal::vst1q(dst + x, v_dst);
112
x+=16;
113
}
114
if( x + 8 < width)
115
{
116
vec64 v_src0, v_src1;
117
uvec64 v_dst;
118
119
v_src0 = internal::vld1(src0 + x);
120
v_src1 = internal::vld1(src1 + x);
121
op(v_src0, v_src1, v_dst);
122
internal::vst1(dst + x, v_dst);
123
x+=8;
124
}
125
}
126
};
127
128
template <typename Op>
129
void vcompare(Size2D size,
130
const typename Op::type * src0Base, ptrdiff_t src0Stride,
131
const typename Op::type * src1Base, ptrdiff_t src1Stride,
132
u8 * dstBase, ptrdiff_t dstStride, const Op & op)
133
{
134
typedef typename Op::type type;
135
typedef typename internal::VecTraits<type>::vec128 vec128;
136
typedef typename internal::VecTraits<type>::unsign::vec128 uvec128;
137
138
if (src0Stride == src1Stride && src0Stride == dstStride &&
139
src0Stride == (ptrdiff_t)(size.width * sizeof(type)))
140
{
141
size.width *= size.height;
142
size.height = 1;
143
}
144
145
const u32 step_base = 32 / sizeof(type);
146
size_t roiw_base = size.width >= (step_base - 1) ? size.width - step_base + 1 : 0;
147
148
for (size_t y = 0; y < size.height; ++y)
149
{
150
const type * src0 = internal::getRowPtr(src0Base, src0Stride, y);
151
const type * src1 = internal::getRowPtr(src1Base, src1Stride, y);
152
u8 * dst = internal::getRowPtr(dstBase, dstStride, y);
153
size_t x = 0;
154
155
for( ; x < roiw_base; x += step_base )
156
{
157
internal::prefetch(src0 + x);
158
internal::prefetch(src1 + x);
159
160
vec128 v_src00 = internal::vld1q(src0 + x), v_src01 = internal::vld1q(src0 + x + 16 / sizeof(type));
161
vec128 v_src10 = internal::vld1q(src1 + x), v_src11 = internal::vld1q(src1 + x + 16 / sizeof(type));
162
uvec128 v_dst0;
163
uvec128 v_dst1;
164
165
op(v_src00, v_src10, v_dst0);
166
op(v_src01, v_src11, v_dst1);
167
168
vnst(dst + x, v_dst0, v_dst1);
169
}
170
171
vtail<Op, sizeof(type)>::compare(src0, src1, dst, op, x, size.width);
172
173
for (; x < size.width; ++x)
174
{
175
op(src0 + x, src1 + x, dst + x);
176
}
177
}
178
}
179
180
template<typename T>
181
struct OpCmpEQ
182
{
183
typedef T type;
184
185
void operator() (const typename internal::VecTraits<T>::vec128 & v_src0, const typename internal::VecTraits<T>::vec128 & v_src1,
186
typename internal::VecTraits<T>::unsign::vec128 & v_dst) const
187
{
188
v_dst = internal::vceqq(v_src0, v_src1);
189
}
190
191
void operator() (const typename internal::VecTraits<T>::vec64 & v_src0, const typename internal::VecTraits<T>::vec64 & v_src1,
192
typename internal::VecTraits<T>::unsign::vec64 & v_dst) const
193
{
194
v_dst = internal::vceq(v_src0, v_src1);
195
}
196
197
void operator() (const T * src0, const T * src1, u8 * dst) const
198
{
199
dst[0] = src0[0] == src1[0] ? 255 : 0;
200
}
201
};
202
203
template<typename T>
204
struct OpCmpNE
205
{
206
typedef T type;
207
208
void operator() (const typename internal::VecTraits<T>::vec128 & v_src0, const typename internal::VecTraits<T>::vec128 & v_src1,
209
typename internal::VecTraits<T>::unsign::vec128 & v_dst) const
210
{
211
v_dst = internal::vmvnq(internal::vceqq(v_src0, v_src1));
212
}
213
214
void operator() (const typename internal::VecTraits<T>::vec64 & v_src0, const typename internal::VecTraits<T>::vec64 & v_src1,
215
typename internal::VecTraits<T>::unsign::vec64 & v_dst) const
216
{
217
v_dst = internal::vmvn(internal::vceq(v_src0, v_src1));
218
}
219
220
void operator() (const T * src0, const T * src1, u8 * dst) const
221
{
222
dst[0] = src0[0] == src1[0] ? 0 : 255;
223
}
224
};
225
226
template<typename T>
227
struct OpCmpGT
228
{
229
typedef T type;
230
231
void operator() (const typename internal::VecTraits<T>::vec128 & v_src0, const typename internal::VecTraits<T>::vec128 & v_src1,
232
typename internal::VecTraits<T>::unsign::vec128 & v_dst) const
233
{
234
v_dst = internal::vcgtq(v_src0, v_src1);
235
}
236
237
void operator() (const typename internal::VecTraits<T>::vec64 & v_src0, const typename internal::VecTraits<T>::vec64 & v_src1,
238
typename internal::VecTraits<T>::unsign::vec64 & v_dst) const
239
{
240
v_dst = internal::vcgt(v_src0, v_src1);
241
}
242
243
void operator() (const T * src0, const T * src1, u8 * dst) const
244
{
245
dst[0] = src0[0] > src1[0] ? 255 : 0;
246
}
247
};
248
249
template<typename T>
250
struct OpCmpGE
251
{
252
typedef T type;
253
254
void operator() (const typename internal::VecTraits<T>::vec128 & v_src0, const typename internal::VecTraits<T>::vec128 & v_src1,
255
typename internal::VecTraits<T>::unsign::vec128 & v_dst) const
256
{
257
v_dst = internal::vcgeq(v_src0, v_src1);
258
}
259
260
void operator() (const typename internal::VecTraits<T>::vec64 & v_src0, const typename internal::VecTraits<T>::vec64 & v_src1,
261
typename internal::VecTraits<T>::unsign::vec64 & v_dst) const
262
{
263
v_dst = internal::vcge(v_src0, v_src1);
264
}
265
266
void operator() (const T * src0, const T * src1, u8 * dst) const
267
{
268
dst[0] = src0[0] >= src1[0] ? 255 : 0;
269
}
270
};
271
272
}
273
274
#define IMPL_CMPOP(op, type) \
275
void cmp##op(const Size2D &size, \
276
const type * src0Base, ptrdiff_t src0Stride, \
277
const type * src1Base, ptrdiff_t src1Stride, \
278
u8 *dstBase, ptrdiff_t dstStride) \
279
{ \
280
internal::assertSupportedConfiguration(); \
281
vcompare(size, \
282
src0Base, src0Stride, \
283
src1Base, src1Stride, \
284
dstBase, dstStride, \
285
OpCmp##op<type>()); \
286
}
287
288
#else
289
290
#define IMPL_CMPOP(op, type) \
291
void cmp##op(const Size2D &size, \
292
const type * src0Base, ptrdiff_t src0Stride, \
293
const type * src1Base, ptrdiff_t src1Stride, \
294
u8 *dstBase, ptrdiff_t dstStride) \
295
{ \
296
internal::assertSupportedConfiguration(); \
297
(void)size; \
298
(void)src0Base; \
299
(void)src0Stride; \
300
(void)src1Base; \
301
(void)src1Stride; \
302
(void)dstBase; \
303
(void)dstStride; \
304
}
305
306
#endif
307
308
IMPL_CMPOP(EQ, u8)
309
IMPL_CMPOP(EQ, s8)
310
IMPL_CMPOP(EQ, u16)
311
IMPL_CMPOP(EQ, s16)
312
IMPL_CMPOP(EQ, u32)
313
IMPL_CMPOP(EQ, s32)
314
IMPL_CMPOP(EQ, f32)
315
316
IMPL_CMPOP(NE, u8)
317
IMPL_CMPOP(NE, s8)
318
IMPL_CMPOP(NE, u16)
319
IMPL_CMPOP(NE, s16)
320
IMPL_CMPOP(NE, u32)
321
IMPL_CMPOP(NE, s32)
322
IMPL_CMPOP(NE, f32)
323
324
IMPL_CMPOP(GT, u8)
325
IMPL_CMPOP(GT, s8)
326
IMPL_CMPOP(GT, u16)
327
IMPL_CMPOP(GT, s16)
328
IMPL_CMPOP(GT, u32)
329
IMPL_CMPOP(GT, s32)
330
IMPL_CMPOP(GT, f32)
331
332
IMPL_CMPOP(GE, u8)
333
IMPL_CMPOP(GE, s8)
334
IMPL_CMPOP(GE, u16)
335
IMPL_CMPOP(GE, s16)
336
IMPL_CMPOP(GE, u32)
337
IMPL_CMPOP(GE, s32)
338
IMPL_CMPOP(GE, f32)
339
340
} // namespace CAROTENE_NS
341
342