Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/core/src/count_non_zero.cpp
16337 views
1
// This file is part of OpenCV project.
2
// It is subject to the license terms in the LICENSE file found in the top-level directory
3
// of this distribution and at http://opencv.org/license.html
4
5
6
#include "precomp.hpp"
7
#include "opencl_kernels_core.hpp"
8
#include "stat.hpp"
9
10
namespace cv {
11
12
template<typename T>
13
static int countNonZero_(const T* src, int len )
14
{
15
int i=0, nz = 0;
16
#if CV_ENABLE_UNROLLED
17
for(; i <= len - 4; i += 4 )
18
nz += (src[i] != 0) + (src[i+1] != 0) + (src[i+2] != 0) + (src[i+3] != 0);
19
#endif
20
for( ; i < len; i++ )
21
nz += src[i] != 0;
22
return nz;
23
}
24
25
static int countNonZero8u( const uchar* src, int len )
26
{
27
int i=0, nz = 0;
28
#if CV_SIMD
29
int len0 = len & -v_uint8::nlanes;
30
v_uint8 v_zero = vx_setzero_u8();
31
v_uint8 v_one = vx_setall_u8(1);
32
33
v_uint32 v_sum32 = vx_setzero_u32();
34
while (i < len0)
35
{
36
v_uint16 v_sum16 = vx_setzero_u16();
37
int j = i;
38
while (j < std::min(len0, i + 65280 * v_uint16::nlanes))
39
{
40
v_uint8 v_sum8 = vx_setzero_u8();
41
int k = j;
42
for (; k < std::min(len0, j + 255 * v_uint8::nlanes); k += v_uint8::nlanes)
43
v_sum8 += v_one & (vx_load(src + k) == v_zero);
44
v_uint16 part1, part2;
45
v_expand(v_sum8, part1, part2);
46
v_sum16 += part1 + part2;
47
j = k;
48
}
49
v_uint32 part1, part2;
50
v_expand(v_sum16, part1, part2);
51
v_sum32 += part1 + part2;
52
i = j;
53
}
54
nz = i - v_reduce_sum(v_sum32);
55
v_cleanup();
56
#endif
57
for( ; i < len; i++ )
58
nz += src[i] != 0;
59
return nz;
60
}
61
62
static int countNonZero16u( const ushort* src, int len )
63
{
64
int i = 0, nz = 0;
65
#if CV_SIMD
66
int len0 = len & -v_int8::nlanes;
67
v_uint16 v_zero = vx_setzero_u16();
68
v_int8 v_one = vx_setall_s8(1);
69
70
v_int32 v_sum32 = vx_setzero_s32();
71
while (i < len0)
72
{
73
v_int16 v_sum16 = vx_setzero_s16();
74
int j = i;
75
while (j < std::min(len0, i + 32766 * v_int16::nlanes))
76
{
77
v_int8 v_sum8 = vx_setzero_s8();
78
int k = j;
79
for (; k < std::min(len0, j + 127 * v_int8::nlanes); k += v_int8::nlanes)
80
v_sum8 += v_one & v_pack(v_reinterpret_as_s16(vx_load(src + k) == v_zero), v_reinterpret_as_s16(vx_load(src + k + v_uint16::nlanes) == v_zero));
81
v_int16 part1, part2;
82
v_expand(v_sum8, part1, part2);
83
v_sum16 += part1 + part2;
84
j = k;
85
}
86
v_int32 part1, part2;
87
v_expand(v_sum16, part1, part2);
88
v_sum32 += part1 + part2;
89
i = j;
90
}
91
nz = i - v_reduce_sum(v_sum32);
92
v_cleanup();
93
#endif
94
return nz + countNonZero_(src + i, len - i);
95
}
96
97
static int countNonZero32s( const int* src, int len )
98
{
99
int i = 0, nz = 0;
100
#if CV_SIMD
101
int len0 = len & -v_int8::nlanes;
102
v_int32 v_zero = vx_setzero_s32();
103
v_int8 v_one = vx_setall_s8(1);
104
105
v_int32 v_sum32 = vx_setzero_s32();
106
while (i < len0)
107
{
108
v_int16 v_sum16 = vx_setzero_s16();
109
int j = i;
110
while (j < std::min(len0, i + 32766 * v_int16::nlanes))
111
{
112
v_int8 v_sum8 = vx_setzero_s8();
113
int k = j;
114
for (; k < std::min(len0, j + 127 * v_int8::nlanes); k += v_int8::nlanes)
115
v_sum8 += v_one & v_pack(
116
v_pack(vx_load(src + k ) == v_zero, vx_load(src + k + v_int32::nlanes) == v_zero),
117
v_pack(vx_load(src + k + 2*v_int32::nlanes) == v_zero, vx_load(src + k + 3*v_int32::nlanes) == v_zero)
118
);
119
v_int16 part1, part2;
120
v_expand(v_sum8, part1, part2);
121
v_sum16 += part1 + part2;
122
j = k;
123
}
124
v_int32 part1, part2;
125
v_expand(v_sum16, part1, part2);
126
v_sum32 += part1 + part2;
127
i = j;
128
}
129
nz = i - v_reduce_sum(v_sum32);
130
v_cleanup();
131
#endif
132
return nz + countNonZero_(src + i, len - i);
133
}
134
135
static int countNonZero32f( const float* src, int len )
136
{
137
int i = 0, nz = 0;
138
#if CV_SIMD
139
int len0 = len & -v_int8::nlanes;
140
v_float32 v_zero = vx_setzero_f32();
141
v_int8 v_one = vx_setall_s8(1);
142
143
v_int32 v_sum32 = vx_setzero_s32();
144
while (i < len0)
145
{
146
v_int16 v_sum16 = vx_setzero_s16();
147
int j = i;
148
while (j < std::min(len0, i + 32766 * v_int16::nlanes))
149
{
150
v_int8 v_sum8 = vx_setzero_s8();
151
int k = j;
152
for (; k < std::min(len0, j + 127 * v_int8::nlanes); k += v_int8::nlanes)
153
v_sum8 += v_one & v_pack(
154
v_pack(v_reinterpret_as_s32(vx_load(src + k ) == v_zero), v_reinterpret_as_s32(vx_load(src + k + v_float32::nlanes) == v_zero)),
155
v_pack(v_reinterpret_as_s32(vx_load(src + k + 2*v_float32::nlanes) == v_zero), v_reinterpret_as_s32(vx_load(src + k + 3*v_float32::nlanes) == v_zero))
156
);
157
v_int16 part1, part2;
158
v_expand(v_sum8, part1, part2);
159
v_sum16 += part1 + part2;
160
j = k;
161
}
162
v_int32 part1, part2;
163
v_expand(v_sum16, part1, part2);
164
v_sum32 += part1 + part2;
165
i = j;
166
}
167
nz = i - v_reduce_sum(v_sum32);
168
v_cleanup();
169
#endif
170
return nz + countNonZero_(src + i, len - i);
171
}
172
173
static int countNonZero64f( const double* src, int len )
174
{
175
return countNonZero_(src, len);
176
}
177
178
typedef int (*CountNonZeroFunc)(const uchar*, int);
179
180
static CountNonZeroFunc getCountNonZeroTab(int depth)
181
{
182
static CountNonZeroFunc countNonZeroTab[] =
183
{
184
(CountNonZeroFunc)GET_OPTIMIZED(countNonZero8u), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero8u),
185
(CountNonZeroFunc)GET_OPTIMIZED(countNonZero16u), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero16u),
186
(CountNonZeroFunc)GET_OPTIMIZED(countNonZero32s), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero32f),
187
(CountNonZeroFunc)GET_OPTIMIZED(countNonZero64f), 0
188
};
189
190
return countNonZeroTab[depth];
191
}
192
193
194
#ifdef HAVE_OPENCL
195
static bool ocl_countNonZero( InputArray _src, int & res )
196
{
197
int type = _src.type(), depth = CV_MAT_DEPTH(type), kercn = ocl::predictOptimalVectorWidth(_src);
198
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
199
200
if (depth == CV_64F && !doubleSupport)
201
return false;
202
203
int dbsize = ocl::Device::getDefault().maxComputeUnits();
204
size_t wgs = ocl::Device::getDefault().maxWorkGroupSize();
205
206
int wgs2_aligned = 1;
207
while (wgs2_aligned < (int)wgs)
208
wgs2_aligned <<= 1;
209
wgs2_aligned >>= 1;
210
211
ocl::Kernel k("reduce", ocl::core::reduce_oclsrc,
212
format("-D srcT=%s -D srcT1=%s -D cn=1 -D OP_COUNT_NON_ZERO"
213
" -D WGS=%d -D kercn=%d -D WGS2_ALIGNED=%d%s%s",
214
ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)),
215
ocl::typeToStr(depth), (int)wgs, kercn,
216
wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "",
217
_src.isContinuous() ? " -D HAVE_SRC_CONT" : ""));
218
if (k.empty())
219
return false;
220
221
UMat src = _src.getUMat(), db(1, dbsize, CV_32SC1);
222
k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(),
223
dbsize, ocl::KernelArg::PtrWriteOnly(db));
224
225
size_t globalsize = dbsize * wgs;
226
if (k.run(1, &globalsize, &wgs, true))
227
return res = saturate_cast<int>(cv::sum(db.getMat(ACCESS_READ))[0]), true;
228
return false;
229
}
230
#endif
231
232
#if defined HAVE_IPP
233
static bool ipp_countNonZero( Mat &src, int &res )
234
{
235
CV_INSTRUMENT_REGION_IPP();
236
237
#if IPP_VERSION_X100 < 201801
238
// Poor performance of SSE42
239
if(cv::ipp::getIppTopFeatures() == ippCPUID_SSE42)
240
return false;
241
#endif
242
243
Ipp32s count = 0;
244
int depth = src.depth();
245
246
if(src.dims <= 2)
247
{
248
IppStatus status;
249
IppiSize size = {src.cols*src.channels(), src.rows};
250
251
if(depth == CV_8U)
252
status = CV_INSTRUMENT_FUN_IPP(ippiCountInRange_8u_C1R, (const Ipp8u *)src.ptr(), (int)src.step, size, &count, 0, 0);
253
else if(depth == CV_32F)
254
status = CV_INSTRUMENT_FUN_IPP(ippiCountInRange_32f_C1R, (const Ipp32f *)src.ptr(), (int)src.step, size, &count, 0, 0);
255
else
256
return false;
257
258
if(status < 0)
259
return false;
260
261
res = size.width*size.height - count;
262
}
263
else
264
{
265
IppStatus status;
266
const Mat *arrays[] = {&src, NULL};
267
Mat planes[1];
268
NAryMatIterator it(arrays, planes, 1);
269
IppiSize size = {(int)it.size*src.channels(), 1};
270
res = 0;
271
for (size_t i = 0; i < it.nplanes; i++, ++it)
272
{
273
if(depth == CV_8U)
274
status = CV_INSTRUMENT_FUN_IPP(ippiCountInRange_8u_C1R, it.planes->ptr<Ipp8u>(), (int)it.planes->step, size, &count, 0, 0);
275
else if(depth == CV_32F)
276
status = CV_INSTRUMENT_FUN_IPP(ippiCountInRange_32f_C1R, it.planes->ptr<Ipp32f>(), (int)it.planes->step, size, &count, 0, 0);
277
else
278
return false;
279
280
if(status < 0 || (int)it.planes->total()*src.channels() < count)
281
return false;
282
283
res += (int)it.planes->total()*src.channels() - count;
284
}
285
}
286
287
return true;
288
}
289
#endif
290
291
} // cv::
292
293
int cv::countNonZero( InputArray _src )
294
{
295
CV_INSTRUMENT_REGION();
296
297
int type = _src.type(), cn = CV_MAT_CN(type);
298
CV_Assert( cn == 1 );
299
300
#if defined HAVE_OPENCL || defined HAVE_IPP
301
int res = -1;
302
#endif
303
304
#ifdef HAVE_OPENCL
305
CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
306
ocl_countNonZero(_src, res),
307
res)
308
#endif
309
310
Mat src = _src.getMat();
311
CV_IPP_RUN_FAST(ipp_countNonZero(src, res), res);
312
313
CountNonZeroFunc func = getCountNonZeroTab(src.depth());
314
CV_Assert( func != 0 );
315
316
const Mat* arrays[] = {&src, 0};
317
uchar* ptrs[1] = {};
318
NAryMatIterator it(arrays, ptrs);
319
int total = (int)it.size, nz = 0;
320
321
for( size_t i = 0; i < it.nplanes; i++, ++it )
322
nz += func( ptrs[0], total );
323
324
return nz;
325
}
326
327
void cv::findNonZero( InputArray _src, OutputArray _idx )
328
{
329
CV_INSTRUMENT_REGION();
330
331
Mat src = _src.getMat();
332
CV_Assert( src.channels() == 1 && src.dims == 2 );
333
334
int depth = src.depth();
335
std::vector<Point> idxvec;
336
int rows = src.rows, cols = src.cols;
337
AutoBuffer<int> buf_(cols + 1);
338
int* buf = buf_.data();
339
340
for( int i = 0; i < rows; i++ )
341
{
342
int j, k = 0;
343
const uchar* ptr8 = src.ptr(i);
344
if( depth == CV_8U || depth == CV_8S )
345
{
346
for( j = 0; j < cols; j++ )
347
if( ptr8[j] != 0 ) buf[k++] = j;
348
}
349
else if( depth == CV_16U || depth == CV_16S )
350
{
351
const ushort* ptr16 = (const ushort*)ptr8;
352
for( j = 0; j < cols; j++ )
353
if( ptr16[j] != 0 ) buf[k++] = j;
354
}
355
else if( depth == CV_32S )
356
{
357
const int* ptr32s = (const int*)ptr8;
358
for( j = 0; j < cols; j++ )
359
if( ptr32s[j] != 0 ) buf[k++] = j;
360
}
361
else if( depth == CV_32F )
362
{
363
const float* ptr32f = (const float*)ptr8;
364
for( j = 0; j < cols; j++ )
365
if( ptr32f[j] != 0 ) buf[k++] = j;
366
}
367
else
368
{
369
const double* ptr64f = (const double*)ptr8;
370
for( j = 0; j < cols; j++ )
371
if( ptr64f[j] != 0 ) buf[k++] = j;
372
}
373
374
if( k > 0 )
375
{
376
size_t sz = idxvec.size();
377
idxvec.resize(sz + k);
378
for( j = 0; j < k; j++ )
379
idxvec[sz + j] = Point(buf[j], i);
380
}
381
}
382
383
if( idxvec.empty() || (_idx.kind() == _InputArray::MAT && !_idx.getMatRef().isContinuous()) )
384
_idx.release();
385
386
if( !idxvec.empty() )
387
Mat(idxvec).copyTo(_idx);
388
}
389
390