CoCalc -- count_non

GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/core/src/count_non_zero.cpp
¹⁶³³⁷ views
1
// This file is part of OpenCV project.
2
// It is subject to the license terms in the LICENSE file found in the top-level directory
3
// of this distribution and at http://opencv.org/license.html
4

5

6
#include "precomp.hpp"
7
#include "opencl_kernels_core.hpp"
8
#include "stat.hpp"
9

10
namespace cv {
11

12
template<typename T>
13
static int countNonZero_(const T* src, int len )
14
{
15
    int i=0, nz = 0;
16
    #if CV_ENABLE_UNROLLED
17
    for(; i <= len - 4; i += 4 )
18
        nz += (src[i] != 0) + (src[i+1] != 0) + (src[i+2] != 0) + (src[i+3] != 0);
19
    #endif
20
    for( ; i < len; i++ )
21
        nz += src[i] != 0;
22
    return nz;
23
}
24

25
static int countNonZero8u( const uchar* src, int len )
26
{
27
    int i=0, nz = 0;
28
#if CV_SIMD
29
    int len0 = len & -v_uint8::nlanes;
30
    v_uint8 v_zero = vx_setzero_u8();
31
    v_uint8 v_one = vx_setall_u8(1);
32

33
    v_uint32 v_sum32 = vx_setzero_u32();
34
    while (i < len0)
35
    {
36
        v_uint16 v_sum16 = vx_setzero_u16();
37
        int j = i;
38
        while (j < std::min(len0, i + 65280 * v_uint16::nlanes))
39
        {
40
            v_uint8 v_sum8 = vx_setzero_u8();
41
            int k = j;
42
            for (; k < std::min(len0, j + 255 * v_uint8::nlanes); k += v_uint8::nlanes)
43
                v_sum8 += v_one & (vx_load(src + k) == v_zero);
44
            v_uint16 part1, part2;
45
            v_expand(v_sum8, part1, part2);
46
            v_sum16 += part1 + part2;
47
            j = k;
48
        }
49
        v_uint32 part1, part2;
50
        v_expand(v_sum16, part1, part2);
51
        v_sum32 += part1 + part2;
52
        i = j;
53
    }
54
    nz = i - v_reduce_sum(v_sum32);
55
    v_cleanup();
56
#endif
57
    for( ; i < len; i++ )
58
        nz += src[i] != 0;
59
    return nz;
60
}
61

62
static int countNonZero16u( const ushort* src, int len )
63
{
64
    int i = 0, nz = 0;
65
#if CV_SIMD
66
    int len0 = len & -v_int8::nlanes;
67
    v_uint16 v_zero = vx_setzero_u16();
68
    v_int8 v_one = vx_setall_s8(1);
69

70
    v_int32 v_sum32 = vx_setzero_s32();
71
    while (i < len0)
72
    {
73
        v_int16 v_sum16 = vx_setzero_s16();
74
        int j = i;
75
        while (j < std::min(len0, i + 32766 * v_int16::nlanes))
76
        {
77
            v_int8 v_sum8 = vx_setzero_s8();
78
            int k = j;
79
            for (; k < std::min(len0, j + 127 * v_int8::nlanes); k += v_int8::nlanes)
80
                v_sum8 += v_one & v_pack(v_reinterpret_as_s16(vx_load(src + k) == v_zero), v_reinterpret_as_s16(vx_load(src + k + v_uint16::nlanes) == v_zero));
81
            v_int16 part1, part2;
82
            v_expand(v_sum8, part1, part2);
83
            v_sum16 += part1 + part2;
84
            j = k;
85
        }
86
        v_int32 part1, part2;
87
        v_expand(v_sum16, part1, part2);
88
        v_sum32 += part1 + part2;
89
        i = j;
90
    }
91
    nz = i - v_reduce_sum(v_sum32);
92
    v_cleanup();
93
#endif
94
    return nz + countNonZero_(src + i, len - i);
95
}
96

97
static int countNonZero32s( const int* src, int len )
98
{
99
    int i = 0, nz = 0;
100
#if CV_SIMD
101
    int len0 = len & -v_int8::nlanes;
102
    v_int32 v_zero = vx_setzero_s32();
103
    v_int8 v_one = vx_setall_s8(1);
104

105
    v_int32 v_sum32 = vx_setzero_s32();
106
    while (i < len0)
107
    {
108
        v_int16 v_sum16 = vx_setzero_s16();
109
        int j = i;
110
        while (j < std::min(len0, i + 32766 * v_int16::nlanes))
111
        {
112
            v_int8 v_sum8 = vx_setzero_s8();
113
            int k = j;
114
            for (; k < std::min(len0, j + 127 * v_int8::nlanes); k += v_int8::nlanes)
115
                v_sum8 += v_one & v_pack(
116
                    v_pack(vx_load(src + k                    ) == v_zero, vx_load(src + k +   v_int32::nlanes) == v_zero),
117
                    v_pack(vx_load(src + k + 2*v_int32::nlanes) == v_zero, vx_load(src + k + 3*v_int32::nlanes) == v_zero)
118
                );
119
            v_int16 part1, part2;
120
            v_expand(v_sum8, part1, part2);
121
            v_sum16 += part1 + part2;
122
            j = k;
123
        }
124
        v_int32 part1, part2;
125
        v_expand(v_sum16, part1, part2);
126
        v_sum32 += part1 + part2;
127
        i = j;
128
    }
129
    nz = i - v_reduce_sum(v_sum32);
130
    v_cleanup();
131
#endif
132
    return nz + countNonZero_(src + i, len - i);
133
}
134

135
static int countNonZero32f( const float* src, int len )
136
{
137
    int i = 0, nz = 0;
138
#if CV_SIMD
139
    int len0 = len & -v_int8::nlanes;
140
    v_float32 v_zero = vx_setzero_f32();
141
    v_int8 v_one = vx_setall_s8(1);
142

143
    v_int32 v_sum32 = vx_setzero_s32();
144
    while (i < len0)
145
    {
146
        v_int16 v_sum16 = vx_setzero_s16();
147
        int j = i;
148
        while (j < std::min(len0, i + 32766 * v_int16::nlanes))
149
        {
150
            v_int8 v_sum8 = vx_setzero_s8();
151
            int k = j;
152
            for (; k < std::min(len0, j + 127 * v_int8::nlanes); k += v_int8::nlanes)
153
                v_sum8 += v_one & v_pack(
154
                    v_pack(v_reinterpret_as_s32(vx_load(src + k                      ) == v_zero), v_reinterpret_as_s32(vx_load(src + k +   v_float32::nlanes) == v_zero)),
155
                    v_pack(v_reinterpret_as_s32(vx_load(src + k + 2*v_float32::nlanes) == v_zero), v_reinterpret_as_s32(vx_load(src + k + 3*v_float32::nlanes) == v_zero))
156
                );
157
            v_int16 part1, part2;
158
            v_expand(v_sum8, part1, part2);
159
            v_sum16 += part1 + part2;
160
            j = k;
161
        }
162
        v_int32 part1, part2;
163
        v_expand(v_sum16, part1, part2);
164
        v_sum32 += part1 + part2;
165
        i = j;
166
    }
167
    nz = i - v_reduce_sum(v_sum32);
168
    v_cleanup();
169
#endif
170
    return nz + countNonZero_(src + i, len - i);
171
}
172

173
static int countNonZero64f( const double* src, int len )
174
{
175
    return countNonZero_(src, len);
176
}
177

178
typedef int (*CountNonZeroFunc)(const uchar*, int);
179

180
static CountNonZeroFunc getCountNonZeroTab(int depth)
181
{
182
    static CountNonZeroFunc countNonZeroTab[] =
183
    {
184
        (CountNonZeroFunc)GET_OPTIMIZED(countNonZero8u), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero8u),
185
        (CountNonZeroFunc)GET_OPTIMIZED(countNonZero16u), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero16u),
186
        (CountNonZeroFunc)GET_OPTIMIZED(countNonZero32s), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero32f),
187
        (CountNonZeroFunc)GET_OPTIMIZED(countNonZero64f), 0
188
    };
189

190
    return countNonZeroTab[depth];
191
}
192

193

194
#ifdef HAVE_OPENCL
195
static bool ocl_countNonZero( InputArray _src, int & res )
196
{
197
    int type = _src.type(), depth = CV_MAT_DEPTH(type), kercn = ocl::predictOptimalVectorWidth(_src);
198
    bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
199

200
    if (depth == CV_64F && !doubleSupport)
201
        return false;
202

203
    int dbsize = ocl::Device::getDefault().maxComputeUnits();
204
    size_t wgs = ocl::Device::getDefault().maxWorkGroupSize();
205

206
    int wgs2_aligned = 1;
207
    while (wgs2_aligned < (int)wgs)
208
        wgs2_aligned <<= 1;
209
    wgs2_aligned >>= 1;
210

211
    ocl::Kernel k("reduce", ocl::core::reduce_oclsrc,
212
                  format("-D srcT=%s -D srcT1=%s -D cn=1 -D OP_COUNT_NON_ZERO"
213
                         " -D WGS=%d -D kercn=%d -D WGS2_ALIGNED=%d%s%s",
214
                         ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)),
215
                         ocl::typeToStr(depth), (int)wgs, kercn,
216
                         wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "",
217
                         _src.isContinuous() ? " -D HAVE_SRC_CONT" : ""));
218
    if (k.empty())
219
        return false;
220

221
    UMat src = _src.getUMat(), db(1, dbsize, CV_32SC1);
222
    k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(),
223
           dbsize, ocl::KernelArg::PtrWriteOnly(db));
224

225
    size_t globalsize = dbsize * wgs;
226
    if (k.run(1, &globalsize, &wgs, true))
227
        return res = saturate_cast<int>(cv::sum(db.getMat(ACCESS_READ))[0]), true;
228
    return false;
229
}
230
#endif
231

232
#if defined HAVE_IPP
233
static bool ipp_countNonZero( Mat &src, int &res )
234
{
235
    CV_INSTRUMENT_REGION_IPP();
236

237
#if IPP_VERSION_X100 < 201801
238
    // Poor performance of SSE42
239
    if(cv::ipp::getIppTopFeatures() == ippCPUID_SSE42)
240
        return false;
241
#endif
242

243
    Ipp32s  count = 0;
244
    int     depth = src.depth();
245

246
    if(src.dims <= 2)
247
    {
248
        IppStatus status;
249
        IppiSize  size = {src.cols*src.channels(), src.rows};
250

251
        if(depth == CV_8U)
252
            status = CV_INSTRUMENT_FUN_IPP(ippiCountInRange_8u_C1R, (const Ipp8u *)src.ptr(), (int)src.step, size, &count, 0, 0);
253
        else if(depth == CV_32F)
254
            status = CV_INSTRUMENT_FUN_IPP(ippiCountInRange_32f_C1R, (const Ipp32f *)src.ptr(), (int)src.step, size, &count, 0, 0);
255
        else
256
            return false;
257

258
        if(status < 0)
259
            return false;
260

261
        res = size.width*size.height - count;
262
    }
263
    else
264
    {
265
        IppStatus       status;
266
        const Mat      *arrays[] = {&src, NULL};
267
        Mat            planes[1];
268
        NAryMatIterator it(arrays, planes, 1);
269
        IppiSize        size  = {(int)it.size*src.channels(), 1};
270
        res = 0;
271
        for (size_t i = 0; i < it.nplanes; i++, ++it)
272
        {
273
            if(depth == CV_8U)
274
                status = CV_INSTRUMENT_FUN_IPP(ippiCountInRange_8u_C1R, it.planes->ptr<Ipp8u>(), (int)it.planes->step, size, &count, 0, 0);
275
            else if(depth == CV_32F)
276
                status = CV_INSTRUMENT_FUN_IPP(ippiCountInRange_32f_C1R, it.planes->ptr<Ipp32f>(), (int)it.planes->step, size, &count, 0, 0);
277
            else
278
                return false;
279

280
            if(status < 0 || (int)it.planes->total()*src.channels() < count)
281
                return false;
282

283
            res += (int)it.planes->total()*src.channels() - count;
284
        }
285
    }
286

287
    return true;
288
}
289
#endif
290

291
} // cv::
292

293
int cv::countNonZero( InputArray _src )
294
{
295
    CV_INSTRUMENT_REGION();
296

297
    int type = _src.type(), cn = CV_MAT_CN(type);
298
    CV_Assert( cn == 1 );
299

300
#if defined HAVE_OPENCL || defined HAVE_IPP
301
    int res = -1;
302
#endif
303

304
#ifdef HAVE_OPENCL
305
    CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
306
                ocl_countNonZero(_src, res),
307
                res)
308
#endif
309

310
    Mat src = _src.getMat();
311
    CV_IPP_RUN_FAST(ipp_countNonZero(src, res), res);
312

313
    CountNonZeroFunc func = getCountNonZeroTab(src.depth());
314
    CV_Assert( func != 0 );
315

316
    const Mat* arrays[] = {&src, 0};
317
    uchar* ptrs[1] = {};
318
    NAryMatIterator it(arrays, ptrs);
319
    int total = (int)it.size, nz = 0;
320

321
    for( size_t i = 0; i < it.nplanes; i++, ++it )
322
        nz += func( ptrs[0], total );
323

324
    return nz;
325
}
326

327
void cv::findNonZero( InputArray _src, OutputArray _idx )
328
{
329
    CV_INSTRUMENT_REGION();
330

331
    Mat src = _src.getMat();
332
    CV_Assert( src.channels() == 1 && src.dims == 2 );
333

334
    int depth = src.depth();
335
    std::vector<Point> idxvec;
336
    int rows = src.rows, cols = src.cols;
337
    AutoBuffer<int> buf_(cols + 1);
338
    int* buf = buf_.data();
339

340
    for( int i = 0; i < rows; i++ )
341
    {
342
        int j, k = 0;
343
        const uchar* ptr8 = src.ptr(i);
344
        if( depth == CV_8U || depth == CV_8S )
345
        {
346
            for( j = 0; j < cols; j++ )
347
                if( ptr8[j] != 0 ) buf[k++] = j;
348
        }
349
        else if( depth == CV_16U || depth == CV_16S )
350
        {
351
            const ushort* ptr16 = (const ushort*)ptr8;
352
            for( j = 0; j < cols; j++ )
353
                if( ptr16[j] != 0 ) buf[k++] = j;
354
        }
355
        else if( depth == CV_32S )
356
        {
357
            const int* ptr32s = (const int*)ptr8;
358
            for( j = 0; j < cols; j++ )
359
                if( ptr32s[j] != 0 ) buf[k++] = j;
360
        }
361
        else if( depth == CV_32F )
362
        {
363
            const float* ptr32f = (const float*)ptr8;
364
            for( j = 0; j < cols; j++ )
365
                if( ptr32f[j] != 0 ) buf[k++] = j;
366
        }
367
        else
368
        {
369
            const double* ptr64f = (const double*)ptr8;
370
            for( j = 0; j < cols; j++ )
371
                if( ptr64f[j] != 0 ) buf[k++] = j;
372
        }
373

374
        if( k > 0 )
375
        {
376
            size_t sz = idxvec.size();
377
            idxvec.resize(sz + k);
378
            for( j = 0; j < k; j++ )
379
                idxvec[sz + j] = Point(buf[j], i);
380
        }
381
    }
382

383
    if( idxvec.empty() || (_idx.kind() == _InputArray::MAT && !_idx.getMatRef().isContinuous()) )
384
        _idx.release();
385

386
    if( !idxvec.empty() )
387
        Mat(idxvec).copyTo(_idx);
388
}
389

390
Product

Resources

Company