Path: blob/master/modules/dnn/src/layers/lrn_layer.cpp
16337 views
/*M///////////////////////////////////////////////////////////////////////////////////////1//2// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.3//4// By downloading, copying, installing or using the software you agree to this license.5// If you do not agree to this license, do not download, install,6// copy or use the software.7//8//9// License Agreement10// For Open Source Computer Vision Library11//12// Copyright (C) 2013, OpenCV Foundation, all rights reserved.13// Copyright (C) 2017, Intel Corporation, all rights reserved.14// Third party copyrights are property of their respective owners.15//16// Redistribution and use in source and binary forms, with or without modification,17// are permitted provided that the following conditions are met:18//19// * Redistribution's of source code must retain the above copyright notice,20// this list of conditions and the following disclaimer.21//22// * Redistribution's in binary form must reproduce the above copyright notice,23// this list of conditions and the following disclaimer in the documentation24// and/or other materials provided with the distribution.25//26// * The name of the copyright holders may not be used to endorse or promote products27// derived from this software without specific prior written permission.28//29// This software is provided by the copyright holders and contributors "as is" and30// any express or implied warranties, including, but not limited to, the implied31// warranties of merchantability and fitness for a particular purpose are disclaimed.32// In no event shall the Intel Corporation or contributors be liable for any direct,33// indirect, incidental, special, exemplary, or consequential damages34// (including, but not limited to, procurement of substitute goods or services;35// loss of use, data, or profits; or business interruption) however caused36// and on any theory of liability, whether in contract, strict liability,37// or tort (including negligence or otherwise) arising in any way out of38// the use of this software, even if advised of the possibility of such damage.39//40//M*/4142#include "../precomp.hpp"43#include "layers_common.hpp"44#include "../op_halide.hpp"45#include "../op_inf_engine.hpp"46#include "../op_vkcom.hpp"47#include "opencv2/imgproc.hpp"48#include "opencv2/dnn/shape_utils.hpp"49#include "opencv2/core/hal/hal.hpp"50#include <algorithm>5152#ifdef HAVE_OPENCL53#include "opencl_kernels_dnn.hpp"54using namespace cv::dnn::ocl4dnn;55#endif5657namespace cv58{59namespace dnn60{6162class LRNLayerImpl CV_FINAL : public LRNLayer63{64public:65LRNLayerImpl(const LayerParams& params)66{67setParamsFrom(params);68type = -1;69String nrmType = params.get<String>("norm_region", "ACROSS_CHANNELS");70if (nrmType == "ACROSS_CHANNELS")71type = CHANNEL_NRM;72else if (nrmType == "WITHIN_CHANNEL")73type = SPATIAL_NRM;74else75CV_Error(Error::StsBadArg, "Unknown region type \"" + nrmType + "\"");7677size = params.get<int>("local_size", 5);78if (size % 2 != 1 || size <= 0)79CV_Error(Error::StsBadArg, "LRN layer supports only positive odd values for local_size");8081alpha = params.get<double>("alpha", 1);82beta = params.get<double>("beta", 0.75);83bias = params.get<double>("bias", 1);84normBySize = params.get<bool>("norm_by_size", true);85}8687#ifdef HAVE_OPENCL88Ptr<OCL4DNNLRN<float> > lrnOp;89#endif9091virtual bool supportBackend(int backendId) CV_OVERRIDE92{93return backendId == DNN_BACKEND_OPENCV ||94backendId == DNN_BACKEND_HALIDE ||95backendId == DNN_BACKEND_INFERENCE_ENGINE && (preferableTarget != DNN_TARGET_MYRIAD || type == CHANNEL_NRM) ||96backendId == DNN_BACKEND_VKCOM && haveVulkan() && (size % 2 == 1) && (type == CHANNEL_NRM);97}9899#ifdef HAVE_OPENCL100virtual void finalize(InputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE101{102lrnOp.release();103}104105bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)106{107std::vector<UMat> inputs;108std::vector<UMat> outputs;109110bool use_half = (inps.depth() == CV_16S);111inps.getUMatVector(inputs);112outs.getUMatVector(outputs);113114if (lrnOp.empty())115{116OCL4DNNLRNConfig config;117config.lrn_type = type == CHANNEL_NRM ?118LRNParameter_NormRegion_ACROSS_CHANNELS :119LRNParameter_NormRegion_WITHIN_CHANNEL;120121CHECK_EQ(size % 2, 1)<< "LRN only supports odd values for local_size";122config.local_size = size;123config.alpha = alpha;124config.beta = beta;125config.k = bias;126CHECK_EQ(4, inputs[0].dims) << "Input must have 4 axes, "127<< "corresponding to (num, channels, height, width)";128config.batch_size = inputs[0].size[0];129config.channels = inputs[0].size[1];130config.height = inputs[0].size[2];131config.width = inputs[0].size[3];132config.norm_by_size = normBySize;133config.use_half = use_half;134135lrnOp = Ptr<OCL4DNNLRN<float> >(new OCL4DNNLRN<float>(config));136}137138if (!lrnOp->Forward(inputs[0], outputs[0]))139return false;140141return true;142}143#endif144145void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE146{147CV_TRACE_FUNCTION();148CV_TRACE_ARG_VALUE(name, "name", name.c_str());149150CV_Assert(inputs_arr.total() == outputs_arr.total());151152CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),153forward_ocl(inputs_arr, outputs_arr, internals_arr))154155if (inputs_arr.depth() == CV_16S)156{157forward_fallback(inputs_arr, outputs_arr, internals_arr);158return;159}160161std::vector<Mat> inputs, outputs;162inputs_arr.getMatVector(inputs);163outputs_arr.getMatVector(outputs);164165CV_Assert(inputs.size() == outputs.size());166167for (int i = 0; i < inputs.size(); i++)168{169CV_Assert(inputs[i].dims == 4);170171Mat &src = inputs[i];172Mat &dst = outputs[i];173174switch (type)175{176case CHANNEL_NRM:177channelNormalization(src, dst);178break;179case SPATIAL_NRM:180spatialNormalization(src, dst);181break;182default:183CV_Error(Error::StsNotImplemented, "Unimplemented mode of LRN layer");184break;185}186}187}188189class ChannelLRN : public ParallelLoopBody190{191public:192ChannelLRN(const float* src, float* dst, int channels, int ksize,193float alpha1, float bias1, float beta1,194size_t planeSize, int nsamples, int nstripes)195{196src_ = src; dst_ = dst;197channels_ = channels;198ksize_ = ksize;199alpha1_ = alpha1; bias1_ = bias1; beta1_ = beta1;200planeSize_ = planeSize; nsamples_ = nsamples; nstripes_ = nstripes;201}202203void operator()(const Range& r) const CV_OVERRIDE204{205int nsamples = nsamples_, nstripes = nstripes_;206size_t planeSize = planeSize_, planeSize_n = planeSize * nsamples;207size_t elemsPerStripe = (planeSize_n + nstripes - 1)/nstripes;208size_t rstart = r.start*elemsPerStripe;209size_t rend = r.end == nstripes ? planeSize_n : r.end*elemsPerStripe;210rstart = std::min(rstart, planeSize_n);211rend = std::min(rend, planeSize_n);212float alpha1 = alpha1_, bias1 = bias1_, beta1 = beta1_;213int k, channels = channels_, ksize = ksize_;214215AutoBuffer<float> buf_((channels + ksize + 1)*2);216float* acc = buf_.data();217float* buf = acc + channels + ksize + 1;218for( k = 0; k <= ksize; k++ )219buf[-k-1] = buf[channels + k] = 0.f;220221for( size_t ofs = rstart; ofs < rend; )222{223int sampleIdx = (int)(ofs/planeSize);224if( sampleIdx >= nsamples )225break;226size_t ofs0 = ofs - sampleIdx*planeSize;227size_t ofs1 = std::min(planeSize - ofs0, rend - ofs) + ofs;228const float* src = src_ + sampleIdx*planeSize*channels + ofs0;229float* dst = dst_ + sampleIdx*planeSize*channels + ofs0;230231for( ; ofs < ofs1; ofs++, src++, dst++ )232{233for( k = 0; k < channels; k++ )234buf[k] = src[k*planeSize];235float s = 0;236for( k = 0; k < ksize; k++ )237s += buf[k]*buf[k];238for( k = 0; k < channels; k++ )239{240float x1 = buf[k + ksize];241float x0 = buf[k - ksize - 1];242s = std::max(s + (x1 + x0)*(x1 - x0), 0.f);243acc[k] = (float)(alpha1*s + bias1);244}245246hal::log32f(acc, acc, channels);247for( k = 0; k < channels; k++ )248acc[k] *= beta1;249hal::exp32f(acc, acc, channels);250251for( k = 0; k < channels; k++ )252dst[k*planeSize] = buf[k]*acc[k];253}254}255}256257const float* src_;258float* dst_;259float alpha1_, bias1_, beta1_;260size_t planeSize_;261int channels_, ksize_, nsamples_, nstripes_;262};263264void channelNormalization(Mat &srcBlob, Mat &dstBlob)265{266int num = srcBlob.size[0];267int channels = srcBlob.size[1];268int ksize = (size - 1) / 2;269int sizeNormFactor = normBySize ? size : 1;270size_t planeSize = srcBlob.size[2]*srcBlob.size[3];271272int nstripes = std::max(getNumThreads(), 1);273274ChannelLRN clrn(srcBlob.ptr<float>(), dstBlob.ptr<float>(), channels,275ksize, alpha/sizeNormFactor, bias, -beta, planeSize, num, nstripes);276parallel_for_(Range(0, nstripes), clrn, nstripes);277}278279void sqrBoxFilter_(const Mat &src, Mat &dst)280{281Mat srcRawWrapper(src.rows, src.cols, src.type(), src.data, src.step[0]);282cv::sqrBoxFilter(srcRawWrapper, dst, dst.depth(), Size(size, size), Point(-1, -1), false, BORDER_CONSTANT);283}284285void spatialNormalization(Mat &srcBlob, Mat &dstBlob)286{287int num = srcBlob.size[0];288int channels = srcBlob.size[1];289int sizeNormFactor = normBySize ? size*size : 1;290291Mat srcMat = srcBlob;292Mat dstMat = dstBlob;293294for (int n = 0; n < num; n++)295{296for (int cn = 0; cn < channels; cn++)297{298Mat src = getPlane(srcMat, n, cn);299Mat dst = getPlane(dstMat, n, cn);300301sqrBoxFilter_(src, dst);302303dst.convertTo(dst, dst.type(), alpha/sizeNormFactor, bias);304cv::pow(dst, beta, dst);305cv::divide(src, dst, dst);306}307}308}309310virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE311{312#ifdef HAVE_VULKAN313std::shared_ptr<vkcom::OpBase> op(new vkcom::OpLRN(size / 2, bias, alpha, beta, normBySize));314return Ptr<BackendNode>(new VkComBackendNode(inputs, op));315#endif316return Ptr<BackendNode>();317}318319virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE320{321#ifdef HAVE_HALIDE322float alphaSize = alpha;323if (normBySize)324alphaSize /= (type == CHANNEL_NRM ? size : size * size);325int width, height, channels, numImgs;326Halide::Buffer<float> inputBuffer = halideBuffer(inputs[0]);327getCanonicalSize(inputBuffer, &width, &height, &channels, &numImgs);328329Halide::Var x("x"), y("y"), c("c"), n("n");330Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));331Halide::Func padded_sq(name + "_padded_sq");332Halide::Func sq("sq");333sq(x, y, c, n) = inputBuffer(x, y, c, n) * inputBuffer(x, y, c, n);334335Halide::Func bounded =336Halide::BoundaryConditions::constant_exterior(sq, 0, 0, width,3370, height,3380, channels,3390, numImgs);340padded_sq(x, y, c, n) = bounded(x, y, c, n);341342Halide::Expr base;343if (type == CHANNEL_NRM)344{345Halide::RDom r((1 - size) / 2, size);346base = alphaSize * sum(padded_sq(x, y, c + r, n));347}348else // SPATIAL_NRM349{350Halide::RDom r((1 - size) / 2, size, (1 - size) / 2, size);351base = alphaSize * sum(padded_sq(x + r.x, y + r.y, c, n));352}353base += static_cast<float>(bias);354top(x, y, c, n) = inputBuffer(x, y, c, n) / pow(base, beta);355return Ptr<BackendNode>(new HalideBackendNode({ padded_sq, top }));356#endif // HAVE_HALIDE357return Ptr<BackendNode>();358}359360virtual void applyHalideScheduler(Ptr<BackendNode>& node,361const std::vector<Mat*> &inputs,362const std::vector<Mat> &outputs,363int targetId) const CV_OVERRIDE364{365#ifdef HAVE_HALIDE366if (targetId != DNN_TARGET_CPU)367{368Layer::applyHalideScheduler(node, inputs, outputs, targetId);369return;370}371int outW, outH, outC, outN;372getCanonicalSize(outputs[0].size, &outW, &outH, &outC, &outN);373374Halide::Var x("x"), y("y"), c("c"), n("n"), yo("yo"), yi("yi"), tile("tile");375Halide::Func& top = node.dynamicCast<HalideBackendNode>()->funcs[1];376Halide::Func& padded_sq = node.dynamicCast<HalideBackendNode>()->funcs[0];377378if (outW < 8 || outH <= 2)379return;380381top.reorder(x, c, y, n)382.split(y, yo, yi, 2)383.fuse(yo, n, tile)384.parallel(tile)385.unroll(yi)386.vectorize(x, 8);387padded_sq.store_at(top, tile)388.compute_at(top, yi);389#endif // HAVE_HALIDE390}391392virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE393{394#ifdef HAVE_INF_ENGINE395InferenceEngine::LayerParams lp;396lp.name = name;397lp.type = "Norm";398lp.precision = InferenceEngine::Precision::FP32;399std::shared_ptr<InferenceEngine::NormLayer> ieLayer(new InferenceEngine::NormLayer(lp));400401ieLayer->_size = size;402ieLayer->_k = (int)bias;403ieLayer->_beta = beta;404ieLayer->_alpha = alpha;405ieLayer->_isAcrossMaps = (type == CHANNEL_NRM);406return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));407#endif // HAVE_INF_ENGINE408return Ptr<BackendNode>();409}410411virtual int64 getFLOPS(const std::vector<MatShape> &inputs,412const std::vector<MatShape> &outputs) const CV_OVERRIDE413{414CV_UNUSED(outputs); // suppress unused variable warning415CV_Assert(inputs.size() > 0);416long flops = 0;417418for(int i = 0; i < inputs.size(); i++)419{420if (type == CHANNEL_NRM)421{422int channels = inputs[i][1];423int ksize = (size - 1) / 2;424425flops += inputs[i][0]*(std::min(ksize, channels)*2*total(inputs[i], 2) + channels*4*total(inputs[i], 2));426427if (ksize < channels)428{429flops += (size + 2*(channels - size))*total(inputs[i], 2);430}431}432else433{434flops += total(inputs[i])*(2*size*size + 2);435}436}437return flops;438}439440private:441enum Type442{443CHANNEL_NRM,444SPATIAL_NRM445};446};447448Ptr<LRNLayer> LRNLayer::create(const LayerParams& params)449{450return Ptr<LRNLayer>(new LRNLayerImpl(params));451}452453}454}455456457