CoCalc -- lut.cpp

GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/core/src/lut.cpp
¹⁶³³⁷ views
1
// This file is part of OpenCV project.
2
// It is subject to the license terms in the LICENSE file found in the top-level directory
3
// of this distribution and at http://opencv.org/license.html
4

5

6
#include "precomp.hpp"
7
#include "opencl_kernels_core.hpp"
8
#include "convert.hpp"
9
#include "opencv2/core/openvx/ovx_defs.hpp"
10

11
/****************************************************************************************\
12
*                                    LUT Transform                                       *
13
\****************************************************************************************/
14

15
namespace cv
16
{
17

18
template<typename T> static void
19
LUT8u_( const uchar* src, const T* lut, T* dst, int len, int cn, int lutcn )
20
{
21
    if( lutcn == 1 )
22
    {
23
        for( int i = 0; i < len*cn; i++ )
24
            dst[i] = lut[src[i]];
25
    }
26
    else
27
    {
28
        for( int i = 0; i < len*cn; i += cn )
29
            for( int k = 0; k < cn; k++ )
30
                dst[i+k] = lut[src[i+k]*cn+k];
31
    }
32
}
33

34
static void LUT8u_8u( const uchar* src, const uchar* lut, uchar* dst, int len, int cn, int lutcn )
35
{
36
    LUT8u_( src, lut, dst, len, cn, lutcn );
37
}
38

39
static void LUT8u_8s( const uchar* src, const schar* lut, schar* dst, int len, int cn, int lutcn )
40
{
41
    LUT8u_( src, lut, dst, len, cn, lutcn );
42
}
43

44
static void LUT8u_16u( const uchar* src, const ushort* lut, ushort* dst, int len, int cn, int lutcn )
45
{
46
    LUT8u_( src, lut, dst, len, cn, lutcn );
47
}
48

49
static void LUT8u_16s( const uchar* src, const short* lut, short* dst, int len, int cn, int lutcn )
50
{
51
    LUT8u_( src, lut, dst, len, cn, lutcn );
52
}
53

54
static void LUT8u_32s( const uchar* src, const int* lut, int* dst, int len, int cn, int lutcn )
55
{
56
    LUT8u_( src, lut, dst, len, cn, lutcn );
57
}
58

59
static void LUT8u_32f( const uchar* src, const float* lut, float* dst, int len, int cn, int lutcn )
60
{
61
    LUT8u_( src, lut, dst, len, cn, lutcn );
62
}
63

64
static void LUT8u_64f( const uchar* src, const double* lut, double* dst, int len, int cn, int lutcn )
65
{
66
    LUT8u_( src, lut, dst, len, cn, lutcn );
67
}
68

69
typedef void (*LUTFunc)( const uchar* src, const uchar* lut, uchar* dst, int len, int cn, int lutcn );
70

71
static LUTFunc lutTab[] =
72
{
73
    (LUTFunc)LUT8u_8u, (LUTFunc)LUT8u_8s, (LUTFunc)LUT8u_16u, (LUTFunc)LUT8u_16s,
74
    (LUTFunc)LUT8u_32s, (LUTFunc)LUT8u_32f, (LUTFunc)LUT8u_64f, 0
75
};
76

77
#ifdef HAVE_OPENCL
78

79
static bool ocl_LUT(InputArray _src, InputArray _lut, OutputArray _dst)
80
{
81
    int lcn = _lut.channels(), dcn = _src.channels(), ddepth = _lut.depth();
82

83
    UMat src = _src.getUMat(), lut = _lut.getUMat();
84
    _dst.create(src.size(), CV_MAKETYPE(ddepth, dcn));
85
    UMat dst = _dst.getUMat();
86
    int kercn = lcn == 1 ? std::min(4, ocl::predictOptimalVectorWidth(_src, _dst)) : dcn;
87

88
    ocl::Kernel k("LUT", ocl::core::lut_oclsrc,
89
                  format("-D dcn=%d -D lcn=%d -D srcT=%s -D dstT=%s", kercn, lcn,
90
                         ocl::typeToStr(src.depth()), ocl::memopTypeToStr(ddepth)));
91
    if (k.empty())
92
        return false;
93

94
    k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::ReadOnlyNoSize(lut),
95
        ocl::KernelArg::WriteOnly(dst, dcn, kercn));
96

97
    size_t globalSize[2] = { (size_t)dst.cols * dcn / kercn, ((size_t)dst.rows + 3) / 4 };
98
    return k.run(2, globalSize, NULL, false);
99
}
100

101
#endif
102

103
#ifdef HAVE_OPENVX
104
static bool openvx_LUT(Mat src, Mat dst, Mat _lut)
105
{
106
    if (src.type() != CV_8UC1 || dst.type() != src.type() || _lut.type() != src.type() || !_lut.isContinuous())
107
        return false;
108

109
    try
110
    {
111
        ivx::Context ctx = ovx::getOpenVXContext();
112

113
        ivx::Image
114
            ia = ivx::Image::createFromHandle(ctx, VX_DF_IMAGE_U8,
115
                ivx::Image::createAddressing(src.cols, src.rows, 1, (vx_int32)(src.step)), src.data),
116
            ib = ivx::Image::createFromHandle(ctx, VX_DF_IMAGE_U8,
117
                ivx::Image::createAddressing(dst.cols, dst.rows, 1, (vx_int32)(dst.step)), dst.data);
118

119
        ivx::LUT lut = ivx::LUT::create(ctx);
120
        lut.copyFrom(_lut);
121
        ivx::IVX_CHECK_STATUS(vxuTableLookup(ctx, ia, lut, ib));
122
    }
123
    catch (ivx::RuntimeError & e)
124
    {
125
        VX_DbgThrow(e.what());
126
    }
127
    catch (ivx::WrapperError & e)
128
    {
129
        VX_DbgThrow(e.what());
130
    }
131

132
    return true;
133
}
134
#endif
135

136
#if defined(HAVE_IPP)
137
#if !IPP_DISABLE_PERF_LUT // there are no performance benefits (PR #2653)
138
namespace ipp {
139

140
class IppLUTParallelBody_LUTC1 : public ParallelLoopBody
141
{
142
public:
143
    bool* ok;
144
    const Mat& src_;
145
    const Mat& lut_;
146
    Mat& dst_;
147

148
    int width;
149
    size_t elemSize1;
150

151
    IppLUTParallelBody_LUTC1(const Mat& src, const Mat& lut, Mat& dst, bool* _ok)
152
        : ok(_ok), src_(src), lut_(lut), dst_(dst)
153
    {
154
        width = dst.cols * dst.channels();
155
        elemSize1 = CV_ELEM_SIZE1(dst.depth());
156

157
        CV_DbgAssert(elemSize1 == 1 || elemSize1 == 4);
158
        *ok = true;
159
    }
160

161
    void operator()( const cv::Range& range ) const
162
    {
163
        if (!*ok)
164
            return;
165

166
        const int row0 = range.start;
167
        const int row1 = range.end;
168

169
        Mat src = src_.rowRange(row0, row1);
170
        Mat dst = dst_.rowRange(row0, row1);
171

172
        IppiSize sz = { width, dst.rows };
173

174
        if (elemSize1 == 1)
175
        {
176
            if (CV_INSTRUMENT_FUN_IPP(ippiLUTPalette_8u_C1R, (const Ipp8u*)src.data, (int)src.step[0], dst.data, (int)dst.step[0], sz, lut_.data, 8) >= 0)
177
                return;
178
        }
179
        else if (elemSize1 == 4)
180
        {
181
            if (CV_INSTRUMENT_FUN_IPP(ippiLUTPalette_8u32u_C1R, (const Ipp8u*)src.data, (int)src.step[0], (Ipp32u*)dst.data, (int)dst.step[0], sz, (Ipp32u*)lut_.data, 8) >= 0)
182
                return;
183
        }
184
        *ok = false;
185
    }
186
private:
187
    IppLUTParallelBody_LUTC1(const IppLUTParallelBody_LUTC1&);
188
    IppLUTParallelBody_LUTC1& operator=(const IppLUTParallelBody_LUTC1&);
189
};
190

191
class IppLUTParallelBody_LUTCN : public ParallelLoopBody
192
{
193
public:
194
    bool *ok;
195
    const Mat& src_;
196
    const Mat& lut_;
197
    Mat& dst_;
198

199
    int lutcn;
200

201
    uchar* lutBuffer;
202
    uchar* lutTable[4];
203

204
    IppLUTParallelBody_LUTCN(const Mat& src, const Mat& lut, Mat& dst, bool* _ok)
205
        : ok(_ok), src_(src), lut_(lut), dst_(dst), lutBuffer(NULL)
206
    {
207
        lutcn = lut.channels();
208
        IppiSize sz256 = {256, 1};
209

210
        size_t elemSize1 = dst.elemSize1();
211
        CV_DbgAssert(elemSize1 == 1);
212
        lutBuffer = (uchar*)CV_IPP_MALLOC(256 * (int)elemSize1 * 4);
213
        lutTable[0] = lutBuffer + 0;
214
        lutTable[1] = lutBuffer + 1 * 256 * elemSize1;
215
        lutTable[2] = lutBuffer + 2 * 256 * elemSize1;
216
        lutTable[3] = lutBuffer + 3 * 256 * elemSize1;
217

218
        CV_DbgAssert(lutcn == 3 || lutcn == 4);
219
        if (lutcn == 3)
220
        {
221
            IppStatus status = CV_INSTRUMENT_FUN_IPP(ippiCopy_8u_C3P3R, lut.ptr(), (int)lut.step[0], lutTable, (int)lut.step[0], sz256);
222
            if (status < 0)
223
                return;
224
        }
225
        else if (lutcn == 4)
226
        {
227
            IppStatus status = CV_INSTRUMENT_FUN_IPP(ippiCopy_8u_C4P4R, lut.ptr(), (int)lut.step[0], lutTable, (int)lut.step[0], sz256);
228
            if (status < 0)
229
                return;
230
        }
231

232
        *ok = true;
233
    }
234

235
    ~IppLUTParallelBody_LUTCN()
236
    {
237
        if (lutBuffer != NULL)
238
            ippFree(lutBuffer);
239
        lutBuffer = NULL;
240
        lutTable[0] = NULL;
241
    }
242

243
    void operator()( const cv::Range& range ) const
244
    {
245
        if (!*ok)
246
            return;
247

248
        const int row0 = range.start;
249
        const int row1 = range.end;
250

251
        Mat src = src_.rowRange(row0, row1);
252
        Mat dst = dst_.rowRange(row0, row1);
253

254
        if (lutcn == 3)
255
        {
256
            if (CV_INSTRUMENT_FUN_IPP(ippiLUTPalette_8u_C3R, src.ptr(), (int)src.step[0], dst.ptr(), (int)dst.step[0], ippiSize(dst.size()), lutTable, 8) >= 0)
257
                return;
258
        }
259
        else if (lutcn == 4)
260
        {
261
            if (CV_INSTRUMENT_FUN_IPP(ippiLUTPalette_8u_C4R, src.ptr(), (int)src.step[0], dst.ptr(), (int)dst.step[0], ippiSize(dst.size()), lutTable, 8) >= 0)
262
                return;
263
        }
264
        *ok = false;
265
    }
266
private:
267
    IppLUTParallelBody_LUTCN(const IppLUTParallelBody_LUTCN&);
268
    IppLUTParallelBody_LUTCN& operator=(const IppLUTParallelBody_LUTCN&);
269
};
270
} // namespace ipp
271

272
static bool ipp_lut(Mat &src, Mat &lut, Mat &dst)
273
{
274
    CV_INSTRUMENT_REGION_IPP();
275

276
    int lutcn = lut.channels();
277

278
    if(src.dims > 2)
279
        return false;
280

281
    bool ok = false;
282
    Ptr<ParallelLoopBody> body;
283

284
    size_t elemSize1 = CV_ELEM_SIZE1(dst.depth());
285

286
    if (lutcn == 1)
287
    {
288
        ParallelLoopBody* p = new ipp::IppLUTParallelBody_LUTC1(src, lut, dst, &ok);
289
        body.reset(p);
290
    }
291
    else if ((lutcn == 3 || lutcn == 4) && elemSize1 == 1)
292
    {
293
        ParallelLoopBody* p = new ipp::IppLUTParallelBody_LUTCN(src, lut, dst, &ok);
294
        body.reset(p);
295
    }
296

297
    if (body != NULL && ok)
298
    {
299
        Range all(0, dst.rows);
300
        if (dst.total()>>18)
301
            parallel_for_(all, *body, (double)std::max((size_t)1, dst.total()>>16));
302
        else
303
            (*body)(all);
304
        if (ok)
305
            return true;
306
    }
307

308
    return false;
309
}
310

311
#endif
312
#endif // IPP
313

314
class LUTParallelBody : public ParallelLoopBody
315
{
316
public:
317
    bool* ok;
318
    const Mat& src_;
319
    const Mat& lut_;
320
    Mat& dst_;
321

322
    LUTFunc func;
323

324
    LUTParallelBody(const Mat& src, const Mat& lut, Mat& dst, bool* _ok)
325
        : ok(_ok), src_(src), lut_(lut), dst_(dst)
326
    {
327
        func = lutTab[lut.depth()];
328
        *ok = (func != NULL);
329
    }
330

331
    void operator()( const cv::Range& range ) const CV_OVERRIDE
332
    {
333
        CV_DbgAssert(*ok);
334

335
        const int row0 = range.start;
336
        const int row1 = range.end;
337

338
        Mat src = src_.rowRange(row0, row1);
339
        Mat dst = dst_.rowRange(row0, row1);
340

341
        int cn = src.channels();
342
        int lutcn = lut_.channels();
343

344
        const Mat* arrays[] = {&src, &dst, 0};
345
        uchar* ptrs[2] = {};
346
        NAryMatIterator it(arrays, ptrs);
347
        int len = (int)it.size;
348

349
        for( size_t i = 0; i < it.nplanes; i++, ++it )
350
            func(ptrs[0], lut_.ptr(), ptrs[1], len, cn, lutcn);
351
    }
352
private:
353
    LUTParallelBody(const LUTParallelBody&);
354
    LUTParallelBody& operator=(const LUTParallelBody&);
355
};
356

357
} // cv::
358

359
void cv::LUT( InputArray _src, InputArray _lut, OutputArray _dst )
360
{
361
    CV_INSTRUMENT_REGION();
362

363
    int cn = _src.channels(), depth = _src.depth();
364
    int lutcn = _lut.channels();
365

366
    CV_Assert( (lutcn == cn || lutcn == 1) &&
367
        _lut.total() == 256 && _lut.isContinuous() &&
368
        (depth == CV_8U || depth == CV_8S) );
369

370
    CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2,
371
               ocl_LUT(_src, _lut, _dst))
372

373
    Mat src = _src.getMat(), lut = _lut.getMat();
374
    _dst.create(src.dims, src.size, CV_MAKETYPE(_lut.depth(), cn));
375
    Mat dst = _dst.getMat();
376

377
    CV_OVX_RUN(!ovx::skipSmallImages<VX_KERNEL_TABLE_LOOKUP>(src.cols, src.rows),
378
               openvx_LUT(src, dst, lut))
379

380
#if !IPP_DISABLE_PERF_LUT
381
    CV_IPP_RUN(_src.dims() <= 2, ipp_lut(src, lut, dst));
382
#endif
383

384
    if (_src.dims() <= 2)
385
    {
386
        bool ok = false;
387
        LUTParallelBody body(src, lut, dst, &ok);
388
        if (ok)
389
        {
390
            Range all(0, dst.rows);
391
            if (dst.total() >= (size_t)(1<<18))
392
                parallel_for_(all, body, (double)std::max((size_t)1, dst.total()>>16));
393
            else
394
                body(all);
395
            if (ok)
396
                return;
397
        }
398
    }
399

400
    LUTFunc func = lutTab[lut.depth()];
401
    CV_Assert( func != 0 );
402

403
    const Mat* arrays[] = {&src, &dst, 0};
404
    uchar* ptrs[2] = {};
405
    NAryMatIterator it(arrays, ptrs);
406
    int len = (int)it.size;
407

408
    for( size_t i = 0; i < it.nplanes; i++, ++it )
409
        func(ptrs[0], lut.ptr(), ptrs[1], len, cn, lutcn);
410
}
411

412
Product

Resources

Company