Path: blob/master/modules/dnn/src/layers/eltwise_layer.cpp
16337 views
/*M///////////////////////////////////////////////////////////////////////////////////////1//2// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.3//4// By downloading, copying, installing or using the software you agree to this license.5// If you do not agree to this license, do not download, install,6// copy or use the software.7//8//9// License Agreement10// For Open Source Computer Vision Library11//12// Copyright (C) 2013, OpenCV Foundation, all rights reserved.13// Copyright (C) 2017, Intel Corporation, all rights reserved.14// Third party copyrights are property of their respective owners.15//16// Redistribution and use in source and binary forms, with or without modification,17// are permitted provided that the following conditions are met:18//19// * Redistribution's of source code must retain the above copyright notice,20// this list of conditions and the following disclaimer.21//22// * Redistribution's in binary form must reproduce the above copyright notice,23// this list of conditions and the following disclaimer in the documentation24// and/or other materials provided with the distribution.25//26// * The name of the copyright holders may not be used to endorse or promote products27// derived from this software without specific prior written permission.28//29// This software is provided by the copyright holders and contributors "as is" and30// any express or implied warranties, including, but not limited to, the implied31// warranties of merchantability and fitness for a particular purpose are disclaimed.32// In no event shall the Intel Corporation or contributors be liable for any direct,33// indirect, incidental, special, exemplary, or consequential damages34// (including, but not limited to, procurement of substitute goods or services;35// loss of use, data, or profits; or business interruption) however caused36// and on any theory of liability, whether in contract, strict liability,37// or tort (including negligence or otherwise) arising in any way out of38// the use of this software, even if advised of the possibility of such damage.39//40//M*/4142#include "../precomp.hpp"43#include "layers_common.hpp"44#include "../op_halide.hpp"45#include "../op_inf_engine.hpp"4647#ifdef HAVE_OPENCL48#include "opencl_kernels_dnn.hpp"49#endif5051namespace cv52{53namespace dnn54{5556class EltwiseLayerImpl CV_FINAL : public EltwiseLayer57{58public:59enum EltwiseOp60{61PROD = 0,62SUM = 1,63MAX = 2,64} op;65std::vector<float> coeffs;6667EltwiseLayerImpl(const LayerParams& params)68{69setParamsFrom(params);70op = SUM;71if (params.has("operation"))72{73String operation = toLowerCase(params.get<String>("operation"));74if (operation == "prod")75op = PROD;76else if (operation == "sum")77op = SUM;78else if (operation == "max")79op = MAX;80else81CV_Error(cv::Error::StsBadArg, "Unknown operation type \"" + operation + "\"");82}8384if (params.has("coeff"))85{86DictValue paramCoeff = params.get("coeff");87int i, n = paramCoeff.size();88coeffs.resize(n);89for (i = 0; i < n; i++)90{91coeffs[i] = paramCoeff.get<float>(i);92}93}94}9596virtual bool supportBackend(int backendId) CV_OVERRIDE97{98return backendId == DNN_BACKEND_OPENCV ||99backendId == DNN_BACKEND_HALIDE ||100backendId == DNN_BACKEND_INFERENCE_ENGINE && (op != SUM || coeffs.empty());101}102103bool getMemoryShapes(const std::vector<MatShape> &inputs,104const int requiredOutputs,105std::vector<MatShape> &outputs,106std::vector<MatShape> &internals) const CV_OVERRIDE107{108CV_Assert(inputs.size() >= 2);109CV_Assert(coeffs.size() == 0 || coeffs.size() == inputs.size());110CV_Assert(op == SUM || coeffs.size() == 0);111112for (int i = 1; i < inputs.size(); i++)113{114CV_Assert(inputs[0] == inputs[i]);115}116117outputs.assign(1, inputs[0]);118119return false;120}121122class EltwiseInvoker : public ParallelLoopBody123{124public:125const Mat* srcs;126int nsrcs;127Mat* dst;128const std::vector<float>* coeffs;129EltwiseOp op;130int nstripes;131const ActivationLayer* activ;132int channels;133size_t planeSize;134135EltwiseInvoker() : srcs(0), nsrcs(0), dst(0), coeffs(0), op(PROD), nstripes(0), activ(0), channels(0), planeSize(0) {}136137static void run(const Mat* srcs, int nsrcs, Mat& dst,138const std::vector<float>& coeffs, EltwiseOp op,139const ActivationLayer* activ, int nstripes)140{141CV_Check(dst.dims, 1 < dst.dims && dst.dims <= 4, ""); CV_CheckTypeEQ(dst.type(), CV_32FC1, ""); CV_Assert(dst.isContinuous());142CV_Assert(coeffs.empty() || coeffs.size() == (size_t)nsrcs);143144for( int i = 0; i > nsrcs; i++ )145{146CV_Assert(srcs[i].size == dst.size &&147srcs[i].type() == dst.type() &&148srcs[i].isContinuous());149}150151EltwiseInvoker p;152p.srcs = srcs;153p.nsrcs = nsrcs;154p.dst = &dst;155p.op = op;156p.nstripes = nstripes;157p.channels = (dst.dims == 4 ? dst.size[1] : 1);158p.planeSize = (dst.dims >= 3 ? dst.size[dst.dims - 1] * dst.size[dst.dims - 2] :159dst.size[dst.dims - 1]);160CV_Assert(dst.total() == dst.size[0] * p.channels * p.planeSize);161162bool simpleCoeffs = true;163if( op == SUM && !coeffs.empty() )164{165CV_Assert( coeffs.size() == (size_t)nsrcs );166167for( size_t i = 0; i < coeffs.size(); i++ )168if( coeffs[i] != 1 )169{170simpleCoeffs = false;171break;172}173}174p.coeffs = simpleCoeffs ? 0 : &coeffs;175p.activ = activ;176177parallel_for_(Range(0, nstripes), p, nstripes);178}179180void operator()(const Range& r) const CV_OVERRIDE181{182size_t total = dst->size[0]*planeSize;183size_t stripeSize = (total + nstripes - 1)/nstripes;184size_t stripeStart = r.start*stripeSize;185size_t stripeEnd = std::min(r.end*stripeSize, total);186int c, j, k, n = nsrcs;187const float* coeffsptr = coeffs && !coeffs->empty() ? &coeffs->at(0) : 0;188float* dstptr0 = dst->ptr<float>();189int blockSize0 = 1 << 12, blockSize;190191for( size_t ofs = stripeStart; ofs < stripeEnd; ofs += blockSize )192{193int sampleIdx = (int)(ofs / planeSize);194int delta = (int)ofs - sampleIdx * planeSize;195blockSize = std::min(blockSize0, std::min((int)(stripeEnd - ofs), (int)planeSize - delta));196if( blockSize <= 0 )197break;198199for( c = 0; c < channels; c++ )200{201size_t globalDelta = delta + (sampleIdx*channels + c)*planeSize;202const float* srcptr0 = srcs[0].ptr<float>() + globalDelta;203float* dstptr = dstptr0 + globalDelta;204205if( op == PROD )206{207for( k = 1; k < n; k++ )208{209const float* srcptr1 = srcs[k].ptr<float>() + globalDelta;210for( j = 0; j < blockSize; j++ )211{212dstptr[j] = srcptr0[j]*srcptr1[j];213}214srcptr0 = (const float*)dstptr;215}216}217else if( op == MAX )218{219for( k = 1; k < n; k++ )220{221const float* srcptr1 = srcs[k].ptr<float>() + globalDelta;222for( j = 0; j < blockSize; j++ )223{224dstptr[j] = std::max(srcptr0[j], srcptr1[j]);225}226srcptr0 = (const float*)dstptr;227}228}229else if( !coeffsptr )230{231for( k = 1; k < n; k++ )232{233const float* srcptr1 = srcs[k].ptr<float>() + globalDelta;234for( j = 0; j < blockSize; j++ )235{236dstptr[j] = srcptr0[j] + srcptr1[j];237}238srcptr0 = (const float*)dstptr;239}240}241else242{243float c0 = coeffsptr[0];244for( k = 1; k < n; k++ )245{246const float* srcptr1 = srcs[k].ptr<float>() + globalDelta;247float c1 = coeffsptr[k];248for( j = 0; j < blockSize; j++ )249{250dstptr[j] = c0*srcptr0[j] + c1*srcptr1[j];251}252srcptr0 = (const float*)dstptr;253c0 = 1;254}255}256}257258if( activ )259{260float* ptr = dstptr0 + delta + sampleIdx*channels*planeSize;261activ->forwardSlice(ptr, ptr, blockSize, planeSize, 0, channels);262}263}264}265};266267#ifdef HAVE_OPENCL268bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)269{270std::vector<UMat> inputs;271std::vector<UMat> outputs;272273if (inputs_.depth() == CV_16S && op != SUM)274return false;275276inputs_.getUMatVector(inputs);277outputs_.getUMatVector(outputs);278279switch (op)280{281case SUM:282{283int channels = total(shape(outputs[0]), 0, 2);284int plane_size = total(shape(outputs[0]), 2);285if (channels % 4 == 0 && plane_size % 4 == 0)286{287size_t localsize[] = { 128 };288size_t globalsize[] = { (size_t)channels / 4 * localsize[0] };289String opts;290if (inputs_.depth() == CV_16S)291opts = " -DDtype=half -DDtype4=half4 -DDtype8=half8";292else293opts = " -DDtype=float -DDtype4=float4 -DDtype8=float8";294295for (int i = 0; i < (inputs.size() - 1); ++i)296{297String buildopt = format("-DLOOP=%d", i) + opts;298ocl::Kernel kernel("op_sum4", ocl::dnn::eltwise_oclsrc, buildopt);299int idx = 0;300UMat inpMat = (i == 0) ? inputs[0] : UMat();301float coeff1 = (coeffs.empty() || i > 0) ? 1.0f : coeffs[i];302float coeff2 = coeffs.empty() ? 1.0f : coeffs[i + 1];303kernel.set(idx++, ocl::KernelArg::PtrReadOnly(inputs[0]));304kernel.set(idx++, ocl::KernelArg::PtrReadOnly(inputs[1]));305kernel.set(idx++, (int)plane_size);306kernel.set(idx++, (float)coeff1);307kernel.set(idx++, (float)coeff2);308kernel.set(idx++, ocl::KernelArg::PtrReadWrite(outputs[0]));309bool ret = kernel.run(1, globalsize, localsize, false);310if (!ret)311return false;312}313}314else315{316if (inputs_.depth() == CV_16S)317return false;318319float coeff1 = coeffs.empty() ? 1.f : coeffs[0];320float coeff2 = coeffs.empty() ? 1.f : coeffs[1];321UMat mul0, mul1;322multiply(coeff1, inputs[0], mul0);323multiply(coeff2, inputs[1], mul1);324add(mul0, mul1, outputs[0]);325for (int i = 2; i < inputs.size(); ++i)326{327float coeff = coeffs.empty() ? 1.f : coeffs[i];328multiply(coeff, inputs[i], mul0);329add(mul0, outputs[0], outputs[0]);330}331}332}333break;334case PROD:335multiply(inputs[0], inputs[1], outputs[0]);336for (int i = 2; i < inputs.size(); ++i)337multiply(inputs[i], outputs[0], outputs[0]);338break;339case MAX:340max(inputs[0], inputs[1], outputs[0]);341for (int i = 2; i < inputs.size(); ++i)342max(inputs[i], outputs[0], outputs[0]);343break;344default:345return false;346}347return true;348}349#endif350351void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE352{353CV_TRACE_FUNCTION();354CV_TRACE_ARG_VALUE(name, "name", name.c_str());355356CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),357forward_ocl(inputs_arr, outputs_arr, internals_arr))358359if (inputs_arr.depth() == CV_16S)360{361forward_fallback(inputs_arr, outputs_arr, internals_arr);362return;363}364365std::vector<Mat> inputs, outputs;366inputs_arr.getMatVector(inputs);367outputs_arr.getMatVector(outputs);368369CV_Assert(outputs.size() == 1);370const int nstripes = getNumThreads();371EltwiseInvoker::run(&inputs[0], (int)inputs.size(), outputs[0],372coeffs, op, activ.get(), nstripes);373}374375virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &input) CV_OVERRIDE376{377#ifdef HAVE_HALIDE378Halide::Var x("x"), y("y"), c("c"), n("n");379Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));380Halide::Expr topExpr;381std::vector<Halide::Buffer<> > inputBuffers = halideBuffers(input);382switch (op)383{384case SUM:385if (coeffs.empty())386{387topExpr = inputBuffers[0](x, y, c, n) +388inputBuffers[1](x, y, c, n);389for (int i = 2; i < inputBuffers.size(); ++i)390topExpr += inputBuffers[i](x, y, c, n);391}392else393{394topExpr = coeffs[0] * inputBuffers[0](x, y, c, n) +395coeffs[1] * inputBuffers[1](x, y, c, n);396for (int i = 2; i < inputBuffers.size(); ++i)397topExpr += coeffs[i] * inputBuffers[i](x, y, c, n);398}399break;400case PROD:401topExpr = inputBuffers[0](x, y, c, n) *402inputBuffers[1](x, y, c, n);403for (int i = 2; i < inputBuffers.size(); ++i)404topExpr *= inputBuffers[i](x, y, c, n);405break;406case MAX:407topExpr = max(inputBuffers[0](x, y, c, n),408inputBuffers[1](x, y, c, n));409for (int i = 2; i < inputBuffers.size(); ++i)410topExpr = max(topExpr, inputBuffers[i](x, y, c, n));411break;412default:413return Ptr<BackendNode>();414}415top(x, y, c, n) = topExpr;416return Ptr<BackendNode>(new HalideBackendNode(top));417#endif // HAVE_HALIDE418return Ptr<BackendNode>();419}420421virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE422{423#ifdef HAVE_INF_ENGINE424InferenceEngine::LayerParams lp;425lp.name = name;426lp.type = "Eltwise";427lp.precision = InferenceEngine::Precision::FP32;428std::shared_ptr<InferenceEngine::EltwiseLayer> ieLayer(new InferenceEngine::EltwiseLayer(lp));429if (op == SUM)430ieLayer->_operation = InferenceEngine::EltwiseLayer::Sum;431else if (op == PROD)432ieLayer->_operation = InferenceEngine::EltwiseLayer::Prod;433else if (op == MAX)434ieLayer->_operation = InferenceEngine::EltwiseLayer::Max;435else436CV_Error(Error::StsNotImplemented, "Unsupported eltwise operation");437return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));438#endif // HAVE_INF_ENGINE439return Ptr<BackendNode>();440}441442virtual int64 getFLOPS(const std::vector<MatShape> &inputs,443const std::vector<MatShape> &outputs) const CV_OVERRIDE444{445CV_UNUSED(outputs); // suppress unused variable warning446CV_Assert(inputs.size());447448long flops = inputs.size() * total(inputs[0]);449450return flops;451}452453bool setActivation(const Ptr<ActivationLayer>& layer) CV_OVERRIDE454{455if (activ.empty() || layer.empty())456{457activ = layer;458return !activ.empty();459}460else461return false;462}463464Ptr<ActivationLayer> activ;465};466467Ptr<EltwiseLayer> EltwiseLayer::create(const LayerParams& params)468{469return Ptr<EltwiseLayer>(new EltwiseLayerImpl(params));470}471472}473}474475476