Path: blob/master/modules/dnn/src/layers/elementwise_layers.cpp
16337 views
/*M///////////////////////////////////////////////////////////////////////////////////////1//2// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.3//4// By downloading, copying, installing or using the software you agree to this license.5// If you do not agree to this license, do not download, install,6// copy or use the software.7//8//9// License Agreement10// For Open Source Computer Vision Library11//12// Copyright (C) 2013, OpenCV Foundation, all rights reserved.13// Copyright (C) 2017, Intel Corporation, all rights reserved.14// Third party copyrights are property of their respective owners.15//16// Redistribution and use in source and binary forms, with or without modification,17// are permitted provided that the following conditions are met:18//19// * Redistribution's of source code must retain the above copyright notice,20// this list of conditions and the following disclaimer.21//22// * Redistribution's in binary form must reproduce the above copyright notice,23// this list of conditions and the following disclaimer in the documentation24// and/or other materials provided with the distribution.25//26// * The name of the copyright holders may not be used to endorse or promote products27// derived from this software without specific prior written permission.28//29// This software is provided by the copyright holders and contributors "as is" and30// any express or implied warranties, including, but not limited to, the implied31// warranties of merchantability and fitness for a particular purpose are disclaimed.32// In no event shall the Intel Corporation or contributors be liable for any direct,33// indirect, incidental, special, exemplary, or consequential damages34// (including, but not limited to, procurement of substitute goods or services;35// loss of use, data, or profits; or business interruption) however caused36// and on any theory of liability, whether in contract, strict liability,37// or tort (including negligence or otherwise) arising in any way out of38// the use of this software, even if advised of the possibility of such damage.39//40//M*/4142#include "../precomp.hpp"43#include "layers_common.hpp"44#include "../op_halide.hpp"45#include "../op_inf_engine.hpp"46#include "../op_vkcom.hpp"47#include "opencv2/imgproc.hpp"48#include <opencv2/dnn/shape_utils.hpp>49#include <iostream>5051#ifdef HAVE_OPENCL52#include "opencl_kernels_dnn.hpp"53#endif5455namespace cv56{57namespace dnn58{5960using std::abs;61using std::exp;62using std::tanh;63using std::pow;6465template<typename Func>66class ElementWiseLayer : public Func::Layer67{68public:69class PBody : public cv::ParallelLoopBody70{71public:72const Func* func_;73const Mat* src_;74Mat* dst_;75int nstripes_;7677PBody(const Func &func, const Mat &src, Mat& dst, int nstripes)78{79func_ = &func;80src_ = &src;81dst_ = &dst;82nstripes_ = nstripes;83}8485void operator()(const Range &r) const CV_OVERRIDE86{87int nstripes = nstripes_, nsamples = 1, outCn = 1;88size_t planeSize = 1;8990if (src_->dims > 1)91{92nsamples = src_->size[0];93outCn = src_->size[1];94}95else96outCn = src_->size[0];9798for (int i = 2; i < src_->dims; ++i)99planeSize *= src_->size[i];100101size_t stripeSize = (planeSize + nstripes - 1)/nstripes;102size_t stripeStart = r.start*stripeSize;103size_t stripeEnd = std::min(r.end*stripeSize, planeSize);104105for( int i = 0; i < nsamples; i++ )106{107const float* srcptr = src_->ptr<float>(i) + stripeStart;108float* dstptr = dst_->ptr<float>(i) + stripeStart;109func_->apply(srcptr, dstptr, (int)(stripeEnd - stripeStart), planeSize, 0, outCn);110}111}112};113114ElementWiseLayer(const Func &f=Func()) : run_parallel(false) { func = f; }115116virtual bool supportBackend(int backendId) CV_OVERRIDE117{118return func.supportBackend(backendId, this->preferableTarget);119}120121virtual Ptr<BackendNode> tryAttach(const Ptr<BackendNode>& node) CV_OVERRIDE122{123switch (node->backendId)124{125case DNN_BACKEND_HALIDE:126{127#ifdef HAVE_HALIDE128auto base = node.dynamicCast<HalideBackendNode>();129Halide::Func& input = base->funcs.back();130Halide::Var x("x"), y("y"), c("c"), n("n");131Halide::Func top = (this->name.empty() ? Halide::Func() : Halide::Func(this->name));132func.attachHalide(input(x, y, c, n), top);133return Ptr<BackendNode>(new HalideBackendNode(base, top));134#endif // HAVE_HALIDE135break;136}137}138return Ptr<BackendNode>();139}140141virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE142{143#ifdef HAVE_HALIDE144Halide::Buffer<float> input = halideBuffer(inputs[0]);145Halide::Var x("x"), y("y"), c("c"), n("n");146Halide::Func top = (this->name.empty() ? Halide::Func() : Halide::Func(this->name));147func.attachHalide(input(x, y, c, n), top);148return Ptr<BackendNode>(new HalideBackendNode(top));149#endif // HAVE_HALIDE150return Ptr<BackendNode>();151}152153virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE154{155#ifdef HAVE_INF_ENGINE156InferenceEngine::LayerParams lp;157lp.name = this->name;158lp.precision = InferenceEngine::Precision::FP32;159return Ptr<BackendNode>(new InfEngineBackendNode(func.initInfEngine(lp)));160#endif // HAVE_INF_ENGINE161return Ptr<BackendNode>();162}163164virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE165{166#ifdef HAVE_VULKAN167return Ptr<BackendNode>(new VkComBackendNode(inputs, func.initVkCom()));168#endif // HAVE_VULKAN169return Ptr<BackendNode>();170}171172virtual bool tryFuse(Ptr<dnn::Layer>& top) CV_OVERRIDE173{174return func.tryFuse(top);175}176177void getScaleShift(Mat& scale_, Mat& shift_) const CV_OVERRIDE178{179func.getScaleShift(scale_, shift_);180}181182bool getMemoryShapes(const std::vector<MatShape> &inputs,183const int requiredOutputs,184std::vector<MatShape> &outputs,185std::vector<MatShape> &internals) const CV_OVERRIDE186{187Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);188return true;189}190191void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE192{193CV_TRACE_FUNCTION();194195CV_OCL_RUN(IS_DNN_OPENCL_TARGET(this->preferableTarget),196func.applyOCL(inputs_arr, outputs_arr, internals_arr))197198if (inputs_arr.depth() == CV_16S)199{200Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);201return;202}203204std::vector<Mat> inputs, outputs;205inputs_arr.getMatVector(inputs);206outputs_arr.getMatVector(outputs);207208for (size_t i = 0; i < inputs.size(); i++)209{210const Mat &src = inputs[i];211Mat &dst = outputs[i];212CV_Assert(src.size == dst.size && src.type() == dst.type() &&213src.isContinuous() && dst.isContinuous() && src.type() == CV_32F);214215const int nstripes = getNumThreads();216PBody body(func, src, dst, nstripes);217parallel_for_(Range(0, nstripes), body, nstripes);218}219}220221void forwardSlice(const float* src, float* dst, int len, size_t planeSize, int cn0, int cn1) const CV_OVERRIDE222{223func.apply(src, dst, len, planeSize, cn0, cn1);224}225226virtual int64 getFLOPS(const std::vector<MatShape> &inputs,227const std::vector<MatShape> &outputs) const CV_OVERRIDE228{229long flops = 0;230for (int i = 0; i < outputs.size(); i++)231{232flops += total(outputs[i]) * func.getFLOPSPerElement();233}234return flops;235}236237Func func;238bool run_parallel;239};240241#ifdef HAVE_OPENCL242static String oclGetTMacro(const UMat &m)243{244String str_name = ocl::typeToStr(m.type());245246if (str_name == "short")247str_name = "half";248249return format("-DT=%s -Dconvert_T=convert_%s ", str_name.c_str(), str_name.c_str());250}251#endif252253struct ReLUFunctor254{255typedef ReLULayer Layer;256float slope;257258explicit ReLUFunctor(float slope_=1.f) : slope(slope_) {}259260bool supportBackend(int backendId, int)261{262return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE ||263backendId == DNN_BACKEND_INFERENCE_ENGINE ||264backendId == DNN_BACKEND_VKCOM;265}266267void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const268{269float s = slope;270for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )271{272int i = 0;273#if CV_SIMD128274v_float32x4 s4 = v_setall_f32(s), z = v_setzero_f32();275for( ; i <= len - 16; i += 16 )276{277v_float32x4 x0 = v_load(srcptr + i);278v_float32x4 x1 = v_load(srcptr + i + 4);279v_float32x4 x2 = v_load(srcptr + i + 8);280v_float32x4 x3 = v_load(srcptr + i + 12);281x0 = v_select(x0 >= z, x0, x0*s4);282x1 = v_select(x1 >= z, x1, x1*s4);283x2 = v_select(x2 >= z, x2, x2*s4);284x3 = v_select(x3 >= z, x3, x3*s4);285v_store(dstptr + i, x0);286v_store(dstptr + i + 4, x1);287v_store(dstptr + i + 8, x2);288v_store(dstptr + i + 12, x3);289}290#endif291for( ; i < len; i++ )292{293float x = srcptr[i];294dstptr[i] = x >= 0.f ? x : s*x;295}296}297}298299#ifdef HAVE_OPENCL300bool initKernel(ocl::Kernel &ker, const UMat &src) const301{302const char *buildoptSlope = (slope == 0) ? "-DRELU_NO_SLOPE" : "";303String buildopt = oclGetTMacro(src) + buildoptSlope;304305if (!ker.create("ReLUForward", ocl::dnn::activations_oclsrc, buildopt))306return false;307308if (slope != 0)309ker.set(3, (float)slope);310311return true;312}313314bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)315{316std::vector<UMat> inputs;317std::vector<UMat> outputs;318319inps.getUMatVector(inputs);320outs.getUMatVector(outputs);321322for (size_t i = 0; i < inputs.size(); i++)323{324UMat& src = inputs[i];325UMat& dst = outputs[i];326CV_Assert(src.isContinuous() && dst.isContinuous() && !src.offset && !dst.offset);327328ocl::Kernel kernel;329CV_Assert(initKernel(kernel, src));330kernel.set(0, (int)src.total());331kernel.set(1, ocl::KernelArg::PtrReadOnly(src));332kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));333334size_t gSize = src.total();335CV_Assert(kernel.run(1, &gSize, NULL, false));336}337338return true;339}340#endif341342#ifdef HAVE_HALIDE343void attachHalide(const Halide::Expr& input, Halide::Func& top)344{345Halide::Var x("x"), y("y"), c("c"), n("n");346if (slope)347{348top(x, y, c, n) = select(input >= 0.0f, input, slope * input);349}350else351{352top(x, y, c, n) = max(input, 0.0f);353}354}355#endif // HAVE_HALIDE356357#ifdef HAVE_INF_ENGINE358InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)359{360lp.type = "ReLU";361std::shared_ptr<InferenceEngine::ReLULayer> ieLayer(new InferenceEngine::ReLULayer(lp));362ieLayer->negative_slope = slope;363ieLayer->params["negative_slope"] = format("%f", slope);364return ieLayer;365}366#endif // HAVE_INF_ENGINE367368#ifdef HAVE_VULKAN369std::shared_ptr<vkcom::OpBase> initVkCom()370{371std::shared_ptr<vkcom::OpBase> op(new vkcom::OpReLU(slope));372return op;373}374#endif // HAVE_VULKAN375376377378bool tryFuse(Ptr<dnn::Layer>&) { return false; }379380void getScaleShift(Mat&, Mat&) const {}381382int64 getFLOPSPerElement() const { return 1; }383};384385struct ReLU6Functor386{387typedef ReLU6Layer Layer;388float minValue, maxValue;389390ReLU6Functor(float minValue_ = 0.0f, float maxValue_ = 6.0f)391: minValue(minValue_), maxValue(maxValue_)392{393CV_Assert(minValue <= maxValue);394}395396bool supportBackend(int backendId, int)397{398return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE ||399backendId == DNN_BACKEND_INFERENCE_ENGINE;400}401402void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const403{404for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )405{406int i = 0;407#if CV_SIMD128408v_float32x4 minV = v_setall_f32(minValue), maxV = v_setall_f32(maxValue);409for( ; i <= len - 16; i += 16 )410{411v_float32x4 x0 = v_load(srcptr + i);412v_float32x4 x1 = v_load(srcptr + i + 4);413v_float32x4 x2 = v_load(srcptr + i + 8);414v_float32x4 x3 = v_load(srcptr + i + 12);415x0 = v_min(v_max(minV, x0), maxV);416x1 = v_min(v_max(minV, x1), maxV);417x2 = v_min(v_max(minV, x2), maxV);418x3 = v_min(v_max(minV, x3), maxV);419v_store(dstptr + i, x0);420v_store(dstptr + i + 4, x1);421v_store(dstptr + i + 8, x2);422v_store(dstptr + i + 12, x3);423}424#endif425for( ; i < len; i++ )426{427float x = srcptr[i];428if (x >= minValue)429dstptr[i] = x <= maxValue ? x : maxValue;430else431dstptr[i] = minValue;432}433}434}435436#ifdef HAVE_OPENCL437bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)438{439std::vector<UMat> inputs;440std::vector<UMat> outputs;441442inps.getUMatVector(inputs);443outs.getUMatVector(outputs);444String buildopt = oclGetTMacro(inputs[0]);445446for (size_t i = 0; i < inputs.size(); i++)447{448UMat& src = inputs[i];449UMat& dst = outputs[i];450451ocl::Kernel kernel("ReLU6Forward", ocl::dnn::activations_oclsrc, buildopt);452kernel.set(0, (int)src.total());453kernel.set(1, ocl::KernelArg::PtrReadOnly(src));454kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));455kernel.set(3, (float)minValue);456kernel.set(4, (float)maxValue);457458size_t gSize = src.total();459CV_Assert(kernel.run(1, &gSize, NULL, false));460}461462return true;463}464#endif465466#ifdef HAVE_HALIDE467void attachHalide(const Halide::Expr& input, Halide::Func& top)468{469Halide::Var x("x"), y("y"), c("c"), n("n");470top(x, y, c, n) = clamp(input, minValue, maxValue);471}472#endif // HAVE_HALIDE473474#ifdef HAVE_INF_ENGINE475InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)476{477lp.type = "Clamp";478std::shared_ptr<InferenceEngine::ClampLayer> ieLayer(new InferenceEngine::ClampLayer(lp));479ieLayer->min_value = minValue;480ieLayer->max_value = maxValue;481ieLayer->params["min"] = format("%f", minValue);482ieLayer->params["max"] = format("%f", maxValue);483return ieLayer;484}485#endif // HAVE_INF_ENGINE486487#ifdef HAVE_VULKAN488std::shared_ptr<vkcom::OpBase> initVkCom()489{490// TODO: add vkcom implementation491return std::shared_ptr<vkcom::OpBase>();492}493#endif // HAVE_VULKAN494495bool tryFuse(Ptr<dnn::Layer>&) { return false; }496497void getScaleShift(Mat&, Mat&) const {}498499int64 getFLOPSPerElement() const { return 2; }500};501502struct TanHFunctor503{504typedef TanHLayer Layer;505506bool supportBackend(int backendId, int)507{508return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE ||509backendId == DNN_BACKEND_INFERENCE_ENGINE;510}511512void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const513{514for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )515{516for( int i = 0; i < len; i++ )517{518float x = srcptr[i];519dstptr[i] = tanh(x);520}521}522}523524#ifdef HAVE_OPENCL525bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)526{527std::vector<UMat> inputs;528std::vector<UMat> outputs;529530inps.getUMatVector(inputs);531outs.getUMatVector(outputs);532String buildopt = oclGetTMacro(inputs[0]);533534for (size_t i = 0; i < inputs.size(); i++)535{536UMat& src = inputs[i];537UMat& dst = outputs[i];538539ocl::Kernel kernel("TanHForward", ocl::dnn::activations_oclsrc, buildopt);540kernel.set(0, (int)src.total());541kernel.set(1, ocl::KernelArg::PtrReadOnly(src));542kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));543544size_t gSize = src.total();545CV_Assert(kernel.run(1, &gSize, NULL, false));546}547548return true;549}550#endif551552#ifdef HAVE_HALIDE553void attachHalide(const Halide::Expr& input, Halide::Func& top)554{555Halide::Var x("x"), y("y"), c("c"), n("n");556top(x, y, c, n) = tanh(input);557}558#endif // HAVE_HALIDE559560#ifdef HAVE_INF_ENGINE561InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)562{563lp.type = "TanH";564std::shared_ptr<InferenceEngine::CNNLayer> ieLayer(new InferenceEngine::CNNLayer(lp));565return ieLayer;566}567#endif // HAVE_INF_ENGINE568569#ifdef HAVE_VULKAN570std::shared_ptr<vkcom::OpBase> initVkCom()571{572// TODO: add vkcom implementation573return std::shared_ptr<vkcom::OpBase>();574}575#endif // HAVE_VULKAN576577bool tryFuse(Ptr<dnn::Layer>&) { return false; }578579void getScaleShift(Mat&, Mat&) const {}580581int64 getFLOPSPerElement() const { return 1; }582};583584struct SigmoidFunctor585{586typedef SigmoidLayer Layer;587588bool supportBackend(int backendId, int)589{590return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE ||591backendId == DNN_BACKEND_INFERENCE_ENGINE;592}593594void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const595{596for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )597{598for( int i = 0; i < len; i++ )599{600float x = srcptr[i];601dstptr[i] = 1.f/(1.f + exp(-x));602}603}604}605606#ifdef HAVE_OPENCL607bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)608{609std::vector<UMat> inputs;610std::vector<UMat> outputs;611612inps.getUMatVector(inputs);613outs.getUMatVector(outputs);614String buildopt = oclGetTMacro(inputs[0]);615616for (size_t i = 0; i < inputs.size(); i++)617{618UMat& src = inputs[i];619UMat& dst = outputs[i];620621ocl::Kernel kernel("SigmoidForward", ocl::dnn::activations_oclsrc, buildopt);622kernel.set(0, (int)src.total());623kernel.set(1, ocl::KernelArg::PtrReadOnly(src));624kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));625626size_t gSize = src.total();627CV_Assert(kernel.run(1, &gSize, NULL, false));628}629630return true;631}632#endif633634#ifdef HAVE_HALIDE635void attachHalide(const Halide::Expr& input, Halide::Func& top)636{637Halide::Var x("x"), y("y"), c("c"), n("n");638top(x, y, c, n) = 1.0f / (1.0f + exp(-input));639}640#endif // HAVE_HALIDE641642#ifdef HAVE_INF_ENGINE643InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)644{645lp.type = "Sigmoid";646std::shared_ptr<InferenceEngine::CNNLayer> ieLayer(new InferenceEngine::CNNLayer(lp));647return ieLayer;648}649#endif // HAVE_INF_ENGINE650651#ifdef HAVE_VULKAN652std::shared_ptr<vkcom::OpBase> initVkCom()653{654// TODO: add vkcom implementation655return std::shared_ptr<vkcom::OpBase>();656}657#endif // HAVE_VULKAN658659bool tryFuse(Ptr<dnn::Layer>&) { return false; }660661void getScaleShift(Mat&, Mat&) const {}662663int64 getFLOPSPerElement() const { return 3; }664};665666struct ELUFunctor667{668typedef ELULayer Layer;669670explicit ELUFunctor() {}671672bool supportBackend(int backendId, int)673{674return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE ||675backendId == DNN_BACKEND_INFERENCE_ENGINE;676}677678void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const679{680for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )681{682for(int i = 0; i < len; i++ )683{684float x = srcptr[i];685dstptr[i] = x >= 0.f ? x : exp(x) - 1;686}687}688}689690#ifdef HAVE_OPENCL691bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)692{693std::vector<UMat> inputs;694std::vector<UMat> outputs;695696inps.getUMatVector(inputs);697outs.getUMatVector(outputs);698String buildopt = oclGetTMacro(inputs[0]);699700for (size_t i = 0; i < inputs.size(); i++)701{702UMat& src = inputs[i];703UMat& dst = outputs[i];704705ocl::Kernel kernel("ELUForward", ocl::dnn::activations_oclsrc, buildopt);706kernel.set(0, (int)src.total());707kernel.set(1, ocl::KernelArg::PtrReadOnly(src));708kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));709710size_t gSize = src.total();711CV_Assert(kernel.run(1, &gSize, NULL, false));712}713714return true;715}716#endif717718#ifdef HAVE_HALIDE719void attachHalide(const Halide::Expr& input, Halide::Func& top)720{721Halide::Var x("x"), y("y"), c("c"), n("n");722top(x, y, c, n) = select(input >= 0.0f, input, exp(input) - 1);723}724#endif // HAVE_HALIDE725726#ifdef HAVE_INF_ENGINE727InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)728{729lp.type = "ELU";730return InferenceEngine::CNNLayerPtr(new InferenceEngine::CNNLayer(lp));731}732#endif // HAVE_INF_ENGINE733734#ifdef HAVE_VULKAN735std::shared_ptr<vkcom::OpBase> initVkCom()736{737// TODO: add vkcom implementation738return std::shared_ptr<vkcom::OpBase>();739}740#endif // HAVE_VULKAN741742bool tryFuse(Ptr<dnn::Layer>&) { return false; }743744void getScaleShift(Mat&, Mat&) const {}745746int64 getFLOPSPerElement() const { return 2; }747};748749struct AbsValFunctor750{751typedef AbsLayer Layer;752753bool supportBackend(int backendId, int)754{755return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE;756}757758void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const759{760for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )761{762for( int i = 0; i < len; i++ )763{764float x = srcptr[i];765dstptr[i] = abs(x);766}767}768}769770#ifdef HAVE_OPENCL771bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)772{773std::vector<UMat> inputs;774std::vector<UMat> outputs;775776inps.getUMatVector(inputs);777outs.getUMatVector(outputs);778String buildopt = oclGetTMacro(inputs[0]);779780for (size_t i = 0; i < inputs.size(); i++)781{782UMat& src = inputs[i];783UMat& dst = outputs[i];784785ocl::Kernel kernel("AbsValForward", ocl::dnn::activations_oclsrc, buildopt);786kernel.set(0, (int)src.total());787kernel.set(1, ocl::KernelArg::PtrReadOnly(src));788kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));789790size_t gSize = src.total();791CV_Assert(kernel.run(1, &gSize, NULL, false));792}793794return true;795}796#endif797798#ifdef HAVE_HALIDE799void attachHalide(const Halide::Expr& input, Halide::Func& top)800{801Halide::Var x("x"), y("y"), c("c"), n("n");802top(x, y, c, n) = abs(input);803}804#endif // HAVE_HALIDE805806#ifdef HAVE_INF_ENGINE807InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)808{809CV_Error(Error::StsNotImplemented, "Abs");810return InferenceEngine::CNNLayerPtr();811}812#endif // HAVE_INF_ENGINE813814#ifdef HAVE_VULKAN815std::shared_ptr<vkcom::OpBase> initVkCom()816{817// TODO: add vkcom implementation818return std::shared_ptr<vkcom::OpBase>();819}820#endif // HAVE_VULKAN821822bool tryFuse(Ptr<dnn::Layer>&) { return false; }823824void getScaleShift(Mat&, Mat&) const {}825826int64 getFLOPSPerElement() const { return 1; }827};828829struct BNLLFunctor830{831typedef BNLLLayer Layer;832833bool supportBackend(int backendId, int)834{835return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE;836}837838void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const839{840for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )841{842for( int i = 0; i < len; i++ )843{844float x = srcptr[i];845dstptr[i] = log(1.f + exp(-abs(x)));846}847}848}849850#ifdef HAVE_OPENCL851bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)852{853// TODO: implement OCL version854return false;855}856#endif857858#ifdef HAVE_HALIDE859void attachHalide(const Halide::Expr& input, Halide::Func& top)860{861Halide::Var x("x"), y("y"), c("c"), n("n");862top(x, y, c, n) = log(1.0f + exp(-abs(input)));863}864#endif // HAVE_HALIDE865866#ifdef HAVE_INF_ENGINE867InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)868{869CV_Error(Error::StsNotImplemented, "BNLL");870return InferenceEngine::CNNLayerPtr();871}872#endif // HAVE_INF_ENGINE873874#ifdef HAVE_VULKAN875std::shared_ptr<vkcom::OpBase> initVkCom()876{877// TODO: add vkcom implementation878return std::shared_ptr<vkcom::OpBase>();879}880#endif // HAVE_VULKAN881882bool tryFuse(Ptr<dnn::Layer>&) { return false; }883884void getScaleShift(Mat&, Mat&) const {}885886int64 getFLOPSPerElement() const { return 5; }887};888889struct PowerFunctor890{891typedef PowerLayer Layer;892893float power;894float scale;895float shift;896897explicit PowerFunctor(float power_ = 1.f, float scale_ = 1.f, float shift_ = 0.f)898: power(power_), scale(scale_), shift(shift_) {}899900bool supportBackend(int backendId, int targetId)901{902if (backendId == DNN_BACKEND_INFERENCE_ENGINE)903return (targetId != DNN_TARGET_OPENCL && targetId != DNN_TARGET_OPENCL_FP16) || power == 1.0;904else905return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE;906}907908void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const909{910float a = scale, b = shift, p = power;911if( p == 1.f )912{913for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )914{915for( int i = 0; i < len; i++ )916{917float x = srcptr[i];918dstptr[i] = a*x + b;919}920}921}922else923{924for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )925{926for( int i = 0; i < len; i++ )927{928float x = srcptr[i];929dstptr[i] = pow(a*x + b, p);930}931}932}933}934935#ifdef HAVE_OPENCL936bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)937{938std::vector<UMat> inputs;939std::vector<UMat> outputs;940941inps.getUMatVector(inputs);942outs.getUMatVector(outputs);943String buildopt = oclGetTMacro(inputs[0]);944945for (size_t i = 0; i < inputs.size(); i++)946{947UMat& src = inputs[i];948UMat& dst = outputs[i];949950ocl::Kernel kernel("PowForward", ocl::dnn::activations_oclsrc, buildopt);951kernel.set(0, (int)src.total());952kernel.set(1, ocl::KernelArg::PtrReadOnly(src));953kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));954kernel.set(3, (float)power);955kernel.set(4, (float)scale);956kernel.set(5, (float)shift);957958size_t gSize = src.total();959CV_Assert(kernel.run(1, &gSize, NULL, false));960}961962return true;963}964#endif965966#ifdef HAVE_HALIDE967void attachHalide(const Halide::Expr& input, Halide::Func& top)968{969Halide::Var x("x"), y("y"), c("c"), n("n");970Halide::Expr topExpr = (scale == 1.0f ? input : input * scale);971if (shift)972{973topExpr += shift;974}975if (power != 1.0f)976{977topExpr = pow(topExpr, power);978}979top(x, y, c, n) = topExpr;980}981#endif // HAVE_HALIDE982983#ifdef HAVE_INF_ENGINE984InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)985{986if (power == 1.0f && scale == 1.0f && shift == 0.0f)987{988// It looks like there is a bug in Inference Engine for DNN_TARGET_OPENCL and DNN_TARGET_OPENCL_FP16989// if power layer do nothing so we replace it to Identity.990lp.type = "Split";991return std::shared_ptr<InferenceEngine::SplitLayer>(new InferenceEngine::SplitLayer(lp));992}993else994{995lp.type = "Power";996std::shared_ptr<InferenceEngine::PowerLayer> ieLayer(new InferenceEngine::PowerLayer(lp));997ieLayer->power = power;998ieLayer->scale = scale;999ieLayer->offset = shift;1000return ieLayer;1001}1002}1003#endif // HAVE_INF_ENGINE10041005#ifdef HAVE_VULKAN1006std::shared_ptr<vkcom::OpBase> initVkCom()1007{1008// TODO: add vkcom implementation1009return std::shared_ptr<vkcom::OpBase>();1010}1011#endif // HAVE_VULKAN10121013bool tryFuse(Ptr<dnn::Layer>& top)1014{1015if (power != 1.0f && shift != 0.0f)1016return false;10171018Mat w, b;1019top->getScaleShift(w, b);1020if ((w.empty() && b.empty()) || w.total() > 1 || b.total() > 1)1021return false;10221023float nextScale = w.empty() ? 1.0f : w.at<float>(0);1024float nextShift = b.empty() ? 0.0f : b.at<float>(0);1025scale = std::pow(scale, power) * nextScale;1026shift = nextScale * shift + nextShift;1027return true;1028}10291030void getScaleShift(Mat& _scale, Mat& _shift) const1031{1032if (power == 1.0f)1033{1034_scale = Mat(1, 1, CV_32F, Scalar(scale));1035_shift = Mat(1, 1, CV_32F, Scalar(shift));1036}1037}10381039int64 getFLOPSPerElement() const { return power == 1 ? 2 : 10; }1040};104110421043struct ChannelsPReLUFunctor1044{1045typedef ChannelsPReLULayer Layer;1046Mat scale;1047#ifdef HAVE_OPENCL1048UMat scale_umat;1049#endif10501051explicit ChannelsPReLUFunctor(const Mat& scale_=Mat()) : scale(scale_)1052{1053}10541055bool supportBackend(int backendId, int)1056{1057return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE;1058}10591060void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const1061{1062CV_Assert(scale.isContinuous() && scale.type() == CV_32F);10631064const float* scaleptr = scale.ptr<float>();1065CV_Assert( 0 <= cn0 && cn0 < cn1 && cn1 <= (int)scale.total() );10661067for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )1068{1069float s = scaleptr[cn];1070int i = 0;1071#if CV_SIMD1281072v_float32x4 s4 = v_setall_f32(s), z = v_setzero_f32();1073for( ; i <= len - 16; i += 16 )1074{1075v_float32x4 x0 = v_load(srcptr + i);1076v_float32x4 x1 = v_load(srcptr + i + 4);1077v_float32x4 x2 = v_load(srcptr + i + 8);1078v_float32x4 x3 = v_load(srcptr + i + 12);1079x0 = v_select(x0 >= z, x0, x0*s4);1080x1 = v_select(x1 >= z, x1, x1*s4);1081x2 = v_select(x2 >= z, x2, x2*s4);1082x3 = v_select(x3 >= z, x3, x3*s4);1083v_store(dstptr + i, x0);1084v_store(dstptr + i + 4, x1);1085v_store(dstptr + i + 8, x2);1086v_store(dstptr + i + 12, x3);1087}1088#endif1089for( ; i < len; i++ )1090{1091float x = srcptr[i];1092dstptr[i] = x >= 0.f ? x : s*x;1093}1094}1095}10961097#ifdef HAVE_OPENCL1098bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)1099{1100if (scale_umat.empty())1101scale.copyTo(scale_umat);11021103std::vector<UMat> inputs;1104std::vector<UMat> outputs;11051106inps.getUMatVector(inputs);1107outs.getUMatVector(outputs);1108String buildopt = oclGetTMacro(inputs[0]);11091110for (size_t i = 0; i < inputs.size(); i++)1111{1112UMat& src = inputs[i];1113UMat& dst = outputs[i];11141115ocl::Kernel kernel("PReLUForward", ocl::dnn::activations_oclsrc, buildopt);1116kernel.set(0, (int)src.total());1117kernel.set(1, (int)src.size[1]);1118kernel.set(2, (int)total(shape(src), 2));1119kernel.set(3, ocl::KernelArg::PtrReadOnly(src));1120kernel.set(4, ocl::KernelArg::PtrWriteOnly(dst));1121kernel.set(5, ocl::KernelArg::PtrReadOnly(scale_umat));11221123size_t gSize = src.total();1124CV_Assert(kernel.run(1, &gSize, NULL, false));1125}11261127return true;1128}1129#endif11301131#ifdef HAVE_HALIDE1132void attachHalide(const Halide::Expr& input, Halide::Func& top)1133{1134Halide::Var x("x"), y("y"), c("c"), n("n");1135auto weights = wrapToHalideBuffer(scale, {(int)scale.total()});1136top(x, y, c, n) = select(input >= 0.0f, input, weights(c) * input);1137}1138#endif // HAVE_HALIDE11391140#ifdef HAVE_INF_ENGINE1141InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)1142{1143CV_Error(Error::StsNotImplemented, "PReLU");1144return InferenceEngine::CNNLayerPtr();1145}1146#endif // HAVE_INF_ENGINE11471148#ifdef HAVE_VULKAN1149std::shared_ptr<vkcom::OpBase> initVkCom()1150{1151// TODO: add vkcom implementation1152return std::shared_ptr<vkcom::OpBase>();1153}1154#endif // HAVE_VULKAN11551156bool tryFuse(Ptr<dnn::Layer>&) { return false; }11571158void getScaleShift(Mat&, Mat&) const {}11591160int64 getFLOPSPerElement() const { return 1; }1161};11621163#define ACTIVATION_CREATOR_FOR(_Layer, _Functor, ...) \1164Ptr<_Layer> _Layer::create() { \1165return return Ptr<_Layer>( new ElementWiseLayer<_Functor>(_Functor()) ); }116611671168Ptr<ReLULayer> ReLULayer::create(const LayerParams& params)1169{1170float negativeSlope = params.get<float>("negative_slope", 0.f);1171Ptr<ReLULayer> l(new ElementWiseLayer<ReLUFunctor>(ReLUFunctor(negativeSlope)));1172l->setParamsFrom(params);1173l->negativeSlope = negativeSlope;11741175return l;1176}11771178Ptr<ReLU6Layer> ReLU6Layer::create(const LayerParams& params)1179{1180float minValue = params.get<float>("min_value", 0.0f);1181float maxValue = params.get<float>("max_value", 6.0f);1182Ptr<ReLU6Layer> l(new ElementWiseLayer<ReLU6Functor>(ReLU6Functor(minValue, maxValue)));1183l->setParamsFrom(params);1184l->minValue = minValue;1185l->maxValue = maxValue;11861187return l;1188}11891190Ptr<TanHLayer> TanHLayer::create(const LayerParams& params)1191{1192Ptr<TanHLayer> l(new ElementWiseLayer<TanHFunctor>());1193l->setParamsFrom(params);11941195return l;1196}11971198Ptr<SigmoidLayer> SigmoidLayer::create(const LayerParams& params)1199{1200Ptr<SigmoidLayer> l(new ElementWiseLayer<SigmoidFunctor>());1201l->setParamsFrom(params);12021203return l;1204}12051206Ptr<ELULayer> ELULayer::create(const LayerParams& params)1207{1208Ptr<ELULayer> l(new ElementWiseLayer<ELUFunctor>(ELUFunctor()));1209l->setParamsFrom(params);12101211return l;1212}12131214Ptr<AbsLayer> AbsLayer::create(const LayerParams& params)1215{1216Ptr<AbsLayer> l(new ElementWiseLayer<AbsValFunctor>());1217l->setParamsFrom(params);12181219return l;1220}12211222Ptr<BNLLLayer> BNLLLayer::create(const LayerParams& params)1223{1224Ptr<BNLLLayer> l(new ElementWiseLayer<BNLLFunctor>());1225l->setParamsFrom(params);12261227return l;1228}12291230Ptr<PowerLayer> PowerLayer::create(const LayerParams& params)1231{1232float power = params.get<float>("power", 1.0f);1233float scale = params.get<float>("scale", 1.0f);1234float shift = params.get<float>("shift", 0.0f);1235Ptr<PowerLayer> l(new ElementWiseLayer<PowerFunctor>(PowerFunctor(power, scale, shift)));1236l->setParamsFrom(params);1237l->power = power;1238l->scale = scale;1239l->shift = shift;12401241return l;1242}12431244Ptr<Layer> ChannelsPReLULayer::create(const LayerParams& params)1245{1246CV_Assert(params.blobs.size() == 1);1247if (params.blobs[0].total() == 1)1248{1249LayerParams reluParams = params;1250reluParams.set("negative_slope", params.blobs[0].at<float>(0));1251return ReLULayer::create(reluParams);1252}1253Ptr<ChannelsPReLULayer> l(new ElementWiseLayer<ChannelsPReLUFunctor>(ChannelsPReLUFunctor(params.blobs[0])));1254l->setParamsFrom(params);12551256return l;1257}12581259}1260}126112621263