Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/3rdparty/carotene/src/in_range.cpp
16337 views
1
/*
2
* By downloading, copying, installing or using the software you agree to this license.
3
* If you do not agree to this license, do not download, install,
4
* copy or use the software.
5
*
6
*
7
* License Agreement
8
* For Open Source Computer Vision Library
9
* (3-clause BSD License)
10
*
11
* Copyright (C) 2012-2015, NVIDIA Corporation, all rights reserved.
12
* Third party copyrights are property of their respective owners.
13
*
14
* Redistribution and use in source and binary forms, with or without modification,
15
* are permitted provided that the following conditions are met:
16
*
17
* * Redistributions of source code must retain the above copyright notice,
18
* this list of conditions and the following disclaimer.
19
*
20
* * Redistributions in binary form must reproduce the above copyright notice,
21
* this list of conditions and the following disclaimer in the documentation
22
* and/or other materials provided with the distribution.
23
*
24
* * Neither the names of the copyright holders nor the names of the contributors
25
* may be used to endorse or promote products derived from this software
26
* without specific prior written permission.
27
*
28
* This software is provided by the copyright holders and contributors "as is" and
29
* any express or implied warranties, including, but not limited to, the implied
30
* warranties of merchantability and fitness for a particular purpose are disclaimed.
31
* In no event shall copyright holders or contributors be liable for any direct,
32
* indirect, incidental, special, exemplary, or consequential damages
33
* (including, but not limited to, procurement of substitute goods or services;
34
* loss of use, data, or profits; or business interruption) however caused
35
* and on any theory of liability, whether in contract, strict liability,
36
* or tort (including negligence or otherwise) arising in any way out of
37
* the use of this software, even if advised of the possibility of such damage.
38
*/
39
40
#include "common.hpp"
41
42
#include "vtransform.hpp"
43
44
namespace CAROTENE_NS {
45
46
#ifdef CAROTENE_NEON
47
48
namespace {
49
50
inline void vnst(u8* dst, uint8x16_t v1, uint8x16_t v2) { vst1q_u8(dst, v1); vst1q_u8(dst+16, v2); }
51
inline void vnst(u8* dst, uint16x8_t v1, uint16x8_t v2) { vst1q_u8(dst, vcombine_u8(vmovn_u16(v1), vmovn_u16(v2))); }
52
inline void vnst(u8* dst, uint32x4_t v1, uint32x4_t v2) { vst1_u8(dst, vmovn_u16(vcombine_u16(vmovn_u32(v1), vmovn_u32(v2)))); }
53
54
template <typename T, int elsize> struct vtail
55
{
56
static inline void inRange(const T *, const T *, const T *,
57
u8 *, size_t &, size_t)
58
{
59
//do nothing since there couldn't be enough data
60
}
61
};
62
template <typename T> struct vtail<T, 2>
63
{
64
static inline void inRange(const T * src, const T * rng1, const T * rng2,
65
u8 * dst, size_t &x, size_t width)
66
{
67
typedef typename internal::VecTraits<T>::vec128 vec128;
68
typedef typename internal::VecTraits<T>::unsign::vec128 uvec128;
69
//There no more than 15 elements in the tail, so we could handle 8 element vector only once
70
if( x + 8 < width)
71
{
72
vec128 vs = internal::vld1q( src + x);
73
vec128 vr1 = internal::vld1q(rng1 + x);
74
vec128 vr2 = internal::vld1q(rng2 + x);
75
uvec128 vd = internal::vandq(internal::vcgeq(vs, vr1), internal::vcgeq(vr2, vs));
76
internal::vst1(dst + x, internal::vmovn(vd));
77
x+=8;
78
}
79
}
80
};
81
template <typename T> struct vtail<T, 1>
82
{
83
static inline void inRange(const T * src, const T * rng1, const T * rng2,
84
u8 * dst, size_t &x, size_t width)
85
{
86
typedef typename internal::VecTraits<T>::vec128 vec128;
87
typedef typename internal::VecTraits<T>::unsign::vec128 uvec128;
88
typedef typename internal::VecTraits<T>::vec64 vec64;
89
typedef typename internal::VecTraits<T>::unsign::vec64 uvec64;
90
//There no more than 31 elements in the tail, so we could handle once 16+8 or 16 or 8 elements
91
if( x + 16 < width)
92
{
93
vec128 vs = internal::vld1q( src + x);
94
vec128 vr1 = internal::vld1q(rng1 + x);
95
vec128 vr2 = internal::vld1q(rng2 + x);
96
uvec128 vd = internal::vandq(internal::vcgeq(vs, vr1), internal::vcgeq(vr2, vs));
97
internal::vst1q(dst + x, vd);
98
x+=16;
99
}
100
if( x + 8 < width)
101
{
102
vec64 vs = internal::vld1( src + x);
103
vec64 vr1 = internal::vld1(rng1 + x);
104
vec64 vr2 = internal::vld1(rng2 + x);
105
uvec64 vd = internal::vand(internal::vcge(vs, vr1), internal::vcge(vr2, vs));
106
internal::vst1(dst + x, vd);
107
x+=8;
108
}
109
}
110
};
111
112
template <typename T>
113
inline void inRangeCheck(const Size2D &_size,
114
const T * srcBase, ptrdiff_t srcStride,
115
const T * rng1Base, ptrdiff_t rng1Stride,
116
const T * rng2Base, ptrdiff_t rng2Stride,
117
u8 * dstBase, ptrdiff_t dstStride)
118
{
119
typedef typename internal::VecTraits<T>::vec128 vec128;
120
typedef typename internal::VecTraits<T>::unsign::vec128 uvec128;
121
122
Size2D size(_size);
123
if (srcStride == dstStride &&
124
srcStride == rng1Stride &&
125
srcStride == rng2Stride &&
126
srcStride == (ptrdiff_t)(size.width))
127
{
128
size.width *= size.height;
129
size.height = 1;
130
}
131
const size_t width = size.width & ~( 32/sizeof(T) - 1 );
132
133
for(size_t j = 0; j < size.height; ++j)
134
{
135
const T * src = internal::getRowPtr( srcBase, srcStride, j);
136
const T * rng1 = internal::getRowPtr(rng1Base, rng1Stride, j);
137
const T * rng2 = internal::getRowPtr(rng2Base, rng2Stride, j);
138
u8 * dst = internal::getRowPtr( dstBase, dstStride, j);
139
size_t i = 0;
140
for( ; i < width; i += 32/sizeof(T) )
141
{
142
internal::prefetch(src + i);
143
internal::prefetch(rng1 + i);
144
internal::prefetch(rng2 + i);
145
146
vec128 vs = internal::vld1q( src + i);
147
vec128 vr1 = internal::vld1q(rng1 + i);
148
vec128 vr2 = internal::vld1q(rng2 + i);
149
uvec128 vd1 = internal::vandq(internal::vcgeq(vs, vr1), internal::vcgeq(vr2, vs));
150
vs = internal::vld1q( src + i + 16/sizeof(T));
151
vr1 = internal::vld1q(rng1 + i + 16/sizeof(T));
152
vr2 = internal::vld1q(rng2 + i + 16/sizeof(T));
153
uvec128 vd2 = internal::vandq(internal::vcgeq(vs, vr1), internal::vcgeq(vr2, vs));
154
vnst(dst + i, vd1, vd2);
155
}
156
vtail<T, sizeof(T)>::inRange(src, rng1, rng2, dst, i, size.width);
157
for( ; i < size.width; i++ )
158
dst[i] = (u8)(-(rng1[i] <= src[i] && src[i] <= rng2[i]));
159
}
160
}
161
162
}
163
164
#define INRANGEFUNC(T) \
165
void inRange(const Size2D &_size, \
166
const T * srcBase, ptrdiff_t srcStride, \
167
const T * rng1Base, ptrdiff_t rng1Stride, \
168
const T * rng2Base, ptrdiff_t rng2Stride, \
169
u8 * dstBase, ptrdiff_t dstStride) \
170
{ \
171
internal::assertSupportedConfiguration(); \
172
inRangeCheck(_size, srcBase, srcStride, \
173
rng1Base, rng1Stride, rng2Base, rng2Stride, \
174
dstBase, dstStride); \
175
}
176
#else
177
#define INRANGEFUNC(T) \
178
void inRange(const Size2D &, \
179
const T *, ptrdiff_t, \
180
const T *, ptrdiff_t, \
181
const T *, ptrdiff_t, \
182
u8 *, ptrdiff_t) \
183
{ \
184
internal::assertSupportedConfiguration(); \
185
}
186
#endif
187
188
INRANGEFUNC(u8)
189
INRANGEFUNC(s8)
190
INRANGEFUNC(u16)
191
INRANGEFUNC(s16)
192
INRANGEFUNC(s32)
193
INRANGEFUNC(f32)
194
195
} // namespace CAROTENE_NS
196
197