CoCalc -- lrn_layer.cpp

GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/dnn/src/layers/lrn_layer.cpp
¹⁶³³⁷ views
1
/*M///////////////////////////////////////////////////////////////////////////////////////
2
//
3
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4
//
5
//  By downloading, copying, installing or using the software you agree to this license.
6
//  If you do not agree to this license, do not download, install,
7
//  copy or use the software.
8
//
9
//
10
//                           License Agreement
11
//                For Open Source Computer Vision Library
12
//
13
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14
// Copyright (C) 2017, Intel Corporation, all rights reserved.
15
// Third party copyrights are property of their respective owners.
16
//
17
// Redistribution and use in source and binary forms, with or without modification,
18
// are permitted provided that the following conditions are met:
19
//
20
//   * Redistribution's of source code must retain the above copyright notice,
21
//     this list of conditions and the following disclaimer.
22
//
23
//   * Redistribution's in binary form must reproduce the above copyright notice,
24
//     this list of conditions and the following disclaimer in the documentation
25
//     and/or other materials provided with the distribution.
26
//
27
//   * The name of the copyright holders may not be used to endorse or promote products
28
//     derived from this software without specific prior written permission.
29
//
30
// This software is provided by the copyright holders and contributors "as is" and
31
// any express or implied warranties, including, but not limited to, the implied
32
// warranties of merchantability and fitness for a particular purpose are disclaimed.
33
// In no event shall the Intel Corporation or contributors be liable for any direct,
34
// indirect, incidental, special, exemplary, or consequential damages
35
// (including, but not limited to, procurement of substitute goods or services;
36
// loss of use, data, or profits; or business interruption) however caused
37
// and on any theory of liability, whether in contract, strict liability,
38
// or tort (including negligence or otherwise) arising in any way out of
39
// the use of this software, even if advised of the possibility of such damage.
40
//
41
//M*/
42

43
#include "../precomp.hpp"
44
#include "layers_common.hpp"
45
#include "../op_halide.hpp"
46
#include "../op_inf_engine.hpp"
47
#include "../op_vkcom.hpp"
48
#include "opencv2/imgproc.hpp"
49
#include "opencv2/dnn/shape_utils.hpp"
50
#include "opencv2/core/hal/hal.hpp"
51
#include <algorithm>
52

53
#ifdef HAVE_OPENCL
54
#include "opencl_kernels_dnn.hpp"
55
using namespace cv::dnn::ocl4dnn;
56
#endif
57

58
namespace cv
59
{
60
namespace dnn
61
{
62

63
class LRNLayerImpl CV_FINAL : public LRNLayer
64
{
65
public:
66
    LRNLayerImpl(const LayerParams& params)
67
    {
68
        setParamsFrom(params);
69
        type = -1;
70
        String nrmType = params.get<String>("norm_region", "ACROSS_CHANNELS");
71
        if (nrmType == "ACROSS_CHANNELS")
72
            type = CHANNEL_NRM;
73
        else if (nrmType == "WITHIN_CHANNEL")
74
            type = SPATIAL_NRM;
75
        else
76
            CV_Error(Error::StsBadArg, "Unknown region type \"" + nrmType + "\"");
77

78
        size = params.get<int>("local_size", 5);
79
        if (size % 2 != 1 || size <= 0)
80
            CV_Error(Error::StsBadArg, "LRN layer supports only positive odd values for local_size");
81

82
        alpha = params.get<double>("alpha", 1);
83
        beta = params.get<double>("beta", 0.75);
84
        bias = params.get<double>("bias", 1);
85
        normBySize = params.get<bool>("norm_by_size", true);
86
    }
87

88
#ifdef HAVE_OPENCL
89
    Ptr<OCL4DNNLRN<float> > lrnOp;
90
#endif
91

92
    virtual bool supportBackend(int backendId) CV_OVERRIDE
93
    {
94
        return backendId == DNN_BACKEND_OPENCV ||
95
               backendId == DNN_BACKEND_HALIDE ||
96
               backendId == DNN_BACKEND_INFERENCE_ENGINE && (preferableTarget != DNN_TARGET_MYRIAD || type == CHANNEL_NRM) ||
97
               backendId == DNN_BACKEND_VKCOM && haveVulkan() && (size % 2 == 1) && (type == CHANNEL_NRM);
98
    }
99

100
#ifdef HAVE_OPENCL
101
    virtual void finalize(InputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE
102
    {
103
        lrnOp.release();
104
    }
105

106
    bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
107
    {
108
        std::vector<UMat> inputs;
109
        std::vector<UMat> outputs;
110

111
        bool use_half = (inps.depth() == CV_16S);
112
        inps.getUMatVector(inputs);
113
        outs.getUMatVector(outputs);
114

115
        if (lrnOp.empty())
116
        {
117
            OCL4DNNLRNConfig config;
118
            config.lrn_type = type == CHANNEL_NRM ?
119
                              LRNParameter_NormRegion_ACROSS_CHANNELS :
120
                              LRNParameter_NormRegion_WITHIN_CHANNEL;
121

122
            CHECK_EQ(size % 2, 1)<< "LRN only supports odd values for local_size";
123
            config.local_size = size;
124
            config.alpha = alpha;
125
            config.beta = beta;
126
            config.k = bias;
127
            CHECK_EQ(4, inputs[0].dims) << "Input must have 4 axes, "
128
                     << "corresponding to (num, channels, height, width)";
129
            config.batch_size = inputs[0].size[0];
130
            config.channels = inputs[0].size[1];
131
            config.height = inputs[0].size[2];
132
            config.width = inputs[0].size[3];
133
            config.norm_by_size = normBySize;
134
            config.use_half = use_half;
135

136
            lrnOp = Ptr<OCL4DNNLRN<float> >(new OCL4DNNLRN<float>(config));
137
        }
138

139
        if (!lrnOp->Forward(inputs[0], outputs[0]))
140
            return false;
141

142
        return true;
143
    }
144
#endif
145

146
    void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
147
    {
148
        CV_TRACE_FUNCTION();
149
        CV_TRACE_ARG_VALUE(name, "name", name.c_str());
150

151
        CV_Assert(inputs_arr.total() == outputs_arr.total());
152

153
        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
154
                   forward_ocl(inputs_arr, outputs_arr, internals_arr))
155

156
        if (inputs_arr.depth() == CV_16S)
157
        {
158
            forward_fallback(inputs_arr, outputs_arr, internals_arr);
159
            return;
160
        }
161

162
        std::vector<Mat> inputs, outputs;
163
        inputs_arr.getMatVector(inputs);
164
        outputs_arr.getMatVector(outputs);
165

166
        CV_Assert(inputs.size() == outputs.size());
167

168
        for (int i = 0; i < inputs.size(); i++)
169
        {
170
            CV_Assert(inputs[i].dims == 4);
171

172
            Mat &src = inputs[i];
173
            Mat &dst = outputs[i];
174

175
            switch (type)
176
            {
177
                case CHANNEL_NRM:
178
                    channelNormalization(src, dst);
179
                    break;
180
                case SPATIAL_NRM:
181
                    spatialNormalization(src, dst);
182
                    break;
183
                default:
184
                    CV_Error(Error::StsNotImplemented, "Unimplemented mode of LRN layer");
185
                    break;
186
            }
187
        }
188
    }
189

190
    class ChannelLRN : public ParallelLoopBody
191
    {
192
    public:
193
        ChannelLRN(const float* src, float* dst, int channels, int ksize,
194
                   float alpha1, float bias1, float beta1,
195
                   size_t planeSize, int nsamples, int nstripes)
196
        {
197
            src_ = src; dst_ = dst;
198
            channels_ = channels;
199
            ksize_ = ksize;
200
            alpha1_ = alpha1; bias1_ = bias1; beta1_ = beta1;
201
            planeSize_ = planeSize; nsamples_ = nsamples; nstripes_ = nstripes;
202
        }
203

204
        void operator()(const Range& r) const CV_OVERRIDE
205
        {
206
            int nsamples = nsamples_, nstripes = nstripes_;
207
            size_t planeSize = planeSize_, planeSize_n = planeSize * nsamples;
208
            size_t elemsPerStripe = (planeSize_n + nstripes - 1)/nstripes;
209
            size_t rstart = r.start*elemsPerStripe;
210
            size_t rend = r.end == nstripes ? planeSize_n : r.end*elemsPerStripe;
211
            rstart = std::min(rstart, planeSize_n);
212
            rend = std::min(rend, planeSize_n);
213
            float alpha1 = alpha1_, bias1 = bias1_, beta1 = beta1_;
214
            int k, channels = channels_, ksize = ksize_;
215

216
            AutoBuffer<float> buf_((channels + ksize + 1)*2);
217
            float* acc = buf_.data();
218
            float* buf = acc + channels + ksize + 1;
219
            for( k = 0; k <= ksize; k++ )
220
                buf[-k-1] = buf[channels + k] = 0.f;
221

222
            for( size_t ofs = rstart; ofs < rend; )
223
            {
224
                int sampleIdx = (int)(ofs/planeSize);
225
                if( sampleIdx >= nsamples )
226
                    break;
227
                size_t ofs0 = ofs - sampleIdx*planeSize;
228
                size_t ofs1 = std::min(planeSize - ofs0, rend - ofs) + ofs;
229
                const float* src = src_ + sampleIdx*planeSize*channels + ofs0;
230
                float* dst = dst_ + sampleIdx*planeSize*channels + ofs0;
231

232
                for( ; ofs < ofs1; ofs++, src++, dst++ )
233
                {
234
                    for( k = 0; k < channels; k++ )
235
                        buf[k] = src[k*planeSize];
236
                    float s = 0;
237
                    for( k = 0; k < ksize; k++ )
238
                        s += buf[k]*buf[k];
239
                    for( k = 0; k < channels; k++ )
240
                    {
241
                        float x1 = buf[k + ksize];
242
                        float x0 = buf[k - ksize - 1];
243
                        s = std::max(s + (x1 + x0)*(x1 - x0), 0.f);
244
                        acc[k] = (float)(alpha1*s + bias1);
245
                    }
246

247
                    hal::log32f(acc, acc, channels);
248
                    for( k = 0; k < channels; k++ )
249
                        acc[k] *= beta1;
250
                    hal::exp32f(acc, acc, channels);
251

252
                    for( k = 0; k < channels; k++ )
253
                        dst[k*planeSize] = buf[k]*acc[k];
254
                }
255
            }
256
        }
257

258
        const float* src_;
259
        float* dst_;
260
        float alpha1_, bias1_, beta1_;
261
        size_t planeSize_;
262
        int channels_, ksize_, nsamples_, nstripes_;
263
    };
264

265
    void channelNormalization(Mat &srcBlob, Mat &dstBlob)
266
    {
267
        int num = srcBlob.size[0];
268
        int channels = srcBlob.size[1];
269
        int ksize = (size - 1) / 2;
270
        int sizeNormFactor = normBySize ? size : 1;
271
        size_t planeSize = srcBlob.size[2]*srcBlob.size[3];
272

273
        int nstripes = std::max(getNumThreads(), 1);
274

275
        ChannelLRN clrn(srcBlob.ptr<float>(), dstBlob.ptr<float>(), channels,
276
                        ksize, alpha/sizeNormFactor, bias, -beta, planeSize, num, nstripes);
277
        parallel_for_(Range(0, nstripes), clrn, nstripes);
278
    }
279

280
    void sqrBoxFilter_(const Mat &src, Mat &dst)
281
    {
282
        Mat srcRawWrapper(src.rows, src.cols, src.type(), src.data, src.step[0]);
283
        cv::sqrBoxFilter(srcRawWrapper, dst, dst.depth(), Size(size, size), Point(-1, -1), false, BORDER_CONSTANT);
284
    }
285

286
    void spatialNormalization(Mat &srcBlob, Mat &dstBlob)
287
    {
288
        int num = srcBlob.size[0];
289
        int channels = srcBlob.size[1];
290
        int sizeNormFactor = normBySize ? size*size : 1;
291

292
        Mat srcMat = srcBlob;
293
        Mat dstMat = dstBlob;
294

295
        for (int n = 0; n < num; n++)
296
        {
297
            for (int cn = 0; cn < channels; cn++)
298
            {
299
                Mat src = getPlane(srcMat, n, cn);
300
                Mat dst = getPlane(dstMat, n, cn);
301

302
                sqrBoxFilter_(src, dst);
303

304
                dst.convertTo(dst, dst.type(), alpha/sizeNormFactor, bias);
305
                cv::pow(dst, beta, dst);
306
                cv::divide(src, dst, dst);
307
            }
308
        }
309
    }
310

311
    virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
312
    {
313
#ifdef HAVE_VULKAN
314
        std::shared_ptr<vkcom::OpBase> op(new vkcom::OpLRN(size / 2, bias, alpha, beta, normBySize));
315
        return Ptr<BackendNode>(new VkComBackendNode(inputs, op));
316
#endif
317
        return Ptr<BackendNode>();
318
    }
319

320
    virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
321
    {
322
#ifdef HAVE_HALIDE
323
        float alphaSize = alpha;
324
        if (normBySize)
325
            alphaSize /= (type == CHANNEL_NRM ? size : size * size);
326
        int width, height, channels, numImgs;
327
        Halide::Buffer<float> inputBuffer = halideBuffer(inputs[0]);
328
        getCanonicalSize(inputBuffer, &width, &height, &channels, &numImgs);
329

330
        Halide::Var x("x"), y("y"), c("c"), n("n");
331
        Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
332
        Halide::Func padded_sq(name + "_padded_sq");
333
        Halide::Func sq("sq");
334
        sq(x, y, c, n) = inputBuffer(x, y, c, n) * inputBuffer(x, y, c, n);
335

336
        Halide::Func bounded =
337
            Halide::BoundaryConditions::constant_exterior(sq, 0, 0, width,
338
                                                          0, height,
339
                                                          0, channels,
340
                                                          0, numImgs);
341
        padded_sq(x, y, c, n) = bounded(x, y, c, n);
342

343
        Halide::Expr base;
344
        if (type == CHANNEL_NRM)
345
        {
346
            Halide::RDom r((1 - size) / 2, size);
347
            base = alphaSize * sum(padded_sq(x, y, c + r, n));
348
        }
349
        else  // SPATIAL_NRM
350
        {
351
            Halide::RDom r((1 - size) / 2, size, (1 - size) / 2, size);
352
            base = alphaSize * sum(padded_sq(x + r.x, y + r.y, c, n));
353
        }
354
        base += static_cast<float>(bias);
355
        top(x, y, c, n) = inputBuffer(x, y, c, n) / pow(base, beta);
356
        return Ptr<BackendNode>(new HalideBackendNode({ padded_sq, top }));
357
#endif  // HAVE_HALIDE
358
        return Ptr<BackendNode>();
359
    }
360

361
    virtual void applyHalideScheduler(Ptr<BackendNode>& node,
362
                                      const std::vector<Mat*> &inputs,
363
                                      const std::vector<Mat> &outputs,
364
                                      int targetId) const CV_OVERRIDE
365
    {
366
#ifdef  HAVE_HALIDE
367
        if (targetId != DNN_TARGET_CPU)
368
        {
369
            Layer::applyHalideScheduler(node, inputs, outputs, targetId);
370
            return;
371
        }
372
        int outW, outH, outC, outN;
373
        getCanonicalSize(outputs[0].size, &outW, &outH, &outC, &outN);
374

375
        Halide::Var x("x"), y("y"), c("c"), n("n"), yo("yo"), yi("yi"), tile("tile");
376
        Halide::Func& top = node.dynamicCast<HalideBackendNode>()->funcs[1];
377
        Halide::Func& padded_sq = node.dynamicCast<HalideBackendNode>()->funcs[0];
378

379
        if (outW < 8 || outH <= 2)
380
            return;
381

382
        top.reorder(x, c, y, n)
383
           .split(y, yo, yi, 2)
384
           .fuse(yo, n, tile)
385
           .parallel(tile)
386
           .unroll(yi)
387
           .vectorize(x, 8);
388
        padded_sq.store_at(top, tile)
389
                 .compute_at(top, yi);
390
#endif  // HAVE_HALIDE
391
    }
392

393
    virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
394
    {
395
#ifdef HAVE_INF_ENGINE
396
        InferenceEngine::LayerParams lp;
397
        lp.name = name;
398
        lp.type = "Norm";
399
        lp.precision = InferenceEngine::Precision::FP32;
400
        std::shared_ptr<InferenceEngine::NormLayer> ieLayer(new InferenceEngine::NormLayer(lp));
401

402
        ieLayer->_size = size;
403
        ieLayer->_k = (int)bias;
404
        ieLayer->_beta = beta;
405
        ieLayer->_alpha = alpha;
406
        ieLayer->_isAcrossMaps = (type == CHANNEL_NRM);
407
        return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
408
#endif  // HAVE_INF_ENGINE
409
        return Ptr<BackendNode>();
410
    }
411

412
    virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
413
                           const std::vector<MatShape> &outputs) const CV_OVERRIDE
414
    {
415
        CV_UNUSED(outputs); // suppress unused variable warning
416
        CV_Assert(inputs.size() > 0);
417
        long flops = 0;
418

419
        for(int i = 0; i < inputs.size(); i++)
420
        {
421
            if (type == CHANNEL_NRM)
422
            {
423
                int channels = inputs[i][1];
424
                int ksize = (size - 1) / 2;
425

426
                flops += inputs[i][0]*(std::min(ksize, channels)*2*total(inputs[i], 2) + channels*4*total(inputs[i], 2));
427

428
                if (ksize < channels)
429
                {
430
                    flops += (size + 2*(channels - size))*total(inputs[i], 2);
431
                }
432
            }
433
            else
434
            {
435
                flops += total(inputs[i])*(2*size*size + 2);
436
            }
437
        }
438
        return flops;
439
    }
440

441
private:
442
    enum Type
443
    {
444
        CHANNEL_NRM,
445
        SPATIAL_NRM
446
    };
447
};
448

449
Ptr<LRNLayer> LRNLayer::create(const LayerParams& params)
450
{
451
    return Ptr<LRNLayer>(new LRNLayerImpl(params));
452
}
453

454
}
455
}
456

457
Product

Resources

Company