Path: blob/master/modules/dnn/src/layers/softmax_layer.cpp
16337 views
/*M///////////////////////////////////////////////////////////////////////////////////////1//2// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.3//4// By downloading, copying, installing or using the software you agree to this license.5// If you do not agree to this license, do not download, install,6// copy or use the software.7//8//9// License Agreement10// For Open Source Computer Vision Library11//12// Copyright (C) 2013, OpenCV Foundation, all rights reserved.13// Copyright (C) 2017, Intel Corporation, all rights reserved.14// Third party copyrights are property of their respective owners.15//16// Redistribution and use in source and binary forms, with or without modification,17// are permitted provided that the following conditions are met:18//19// * Redistribution's of source code must retain the above copyright notice,20// this list of conditions and the following disclaimer.21//22// * Redistribution's in binary form must reproduce the above copyright notice,23// this list of conditions and the following disclaimer in the documentation24// and/or other materials provided with the distribution.25//26// * The name of the copyright holders may not be used to endorse or promote products27// derived from this software without specific prior written permission.28//29// This software is provided by the copyright holders and contributors "as is" and30// any express or implied warranties, including, but not limited to, the implied31// warranties of merchantability and fitness for a particular purpose are disclaimed.32// In no event shall the Intel Corporation or contributors be liable for any direct,33// indirect, incidental, special, exemplary, or consequential damages34// (including, but not limited to, procurement of substitute goods or services;35// loss of use, data, or profits; or business interruption) however caused36// and on any theory of liability, whether in contract, strict liability,37// or tort (including negligence or otherwise) arising in any way out of38// the use of this software, even if advised of the possibility of such damage.39//40//M*/4142#include "../precomp.hpp"43#include "layers_common.hpp"44#include "../op_halide.hpp"45#include "../op_inf_engine.hpp"46#include "../op_vkcom.hpp"47#include <algorithm>48#include <stdlib.h>49using std::max;5051#ifdef HAVE_OPENCL52#include "opencl_kernels_dnn.hpp"53using namespace cv::dnn::ocl4dnn;54#endif5556namespace cv57{58namespace dnn59{6061class SoftMaxLayerImpl CV_FINAL : public SoftmaxLayer62{63public:6465SoftMaxLayerImpl(const LayerParams& params)66{67axisRaw = params.get<int>("axis", 1);68logSoftMax = params.get<bool>("log_softmax", false);69setParamsFrom(params);70}7172#ifdef HAVE_OPENCL73Ptr<OCL4DNNSoftmax<float> > softmaxOp;74#endif7576bool getMemoryShapes(const std::vector<MatShape> &inputs,77const int requiredOutputs,78std::vector<MatShape> &outputs,79std::vector<MatShape> &internals) const CV_OVERRIDE80{81bool inplace = Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);82MatShape shape = inputs[0];83int cAxis = clamp(axisRaw, shape.size());84shape[cAxis] = 1;85internals.assign(1, shape);86return inplace;87}8889virtual bool supportBackend(int backendId) CV_OVERRIDE90{91return backendId == DNN_BACKEND_OPENCV ||92backendId == DNN_BACKEND_HALIDE && haveHalide() && axisRaw == 1 ||93backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine() && !logSoftMax ||94backendId == DNN_BACKEND_VKCOM && haveVulkan();95}9697#ifdef HAVE_OPENCL98virtual void finalize(const std::vector<Mat*> &inputs, std::vector<Mat> &outputs) CV_OVERRIDE99{100softmaxOp.release();101}102103bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)104{105std::vector<UMat> inputs;106std::vector<UMat> outputs;107std::vector<UMat> internals;108109bool use_half = (inputs_.depth() == CV_16S);110inputs_.getUMatVector(inputs);111outputs_.getUMatVector(outputs);112internals_.getUMatVector(internals);113114UMat& src = inputs[0];115UMat& dstMat = outputs[0];116int axis = clamp(axisRaw, src.dims);117118if (softmaxOp.empty())119{120OCL4DNNSoftmaxConfig config;121config.in_shape = shape(inputs[0]);122config.axis = axis;123config.channels = inputs[0].size[axis];124config.logsoftmax = logSoftMax;125config.use_half = use_half;126127softmaxOp = Ptr<OCL4DNNSoftmax<float> >(new OCL4DNNSoftmax<float>(config));128}129130if (softmaxOp->Forward(src, dstMat))131return true;132133UMat& bufMat = internals[0];134MatShape s = shape(src);135size_t outerSize = total(s, 0, axis);136size_t channels = src.size[axis];137size_t innerSize = total(s, axis + 1);138139String buildOpts = format("-DT=%s", use_half ? "half" : "float");140ocl::Kernel kmax, ksub, ksum, kdiv;141142if (!kmax.create("kernel_channel_max", ocl::dnn::softmax_oclsrc, buildOpts))143return false;144145if (!ksub.create("kernel_channel_subtract", ocl::dnn::softmax_oclsrc, buildOpts))146return false;147148if (!ksum.create("kernel_channel_sum", ocl::dnn::softmax_oclsrc, buildOpts))149return false;150151if (logSoftMax) buildOpts += " -DLOG_SOFTMAX ";152if (!kdiv.create("kernel_channel_div", ocl::dnn::softmax_oclsrc, buildOpts))153return false;154155size_t bufSize = internals[0].total();156size_t totalSize = src.total();157158size_t internal_globalSize[1] = { bufSize };159size_t total_globalSize[1] = { totalSize };160161kmax.args((int)outerSize, (int)channels, (int)innerSize,162ocl::KernelArg::PtrReadOnly(src), ocl::KernelArg::PtrReadWrite(bufMat));163if (!kmax.run(1, internal_globalSize, NULL, false))164return false;165166ksub.args((int)totalSize, (int)outerSize, (int)channels, (int)innerSize,167ocl::KernelArg::PtrReadOnly(bufMat),168ocl::KernelArg::PtrReadOnly(src), ocl::KernelArg::PtrWriteOnly(dstMat));169if (!ksub.run(1, total_globalSize, NULL, false))170return false;171172ksum.args((int)outerSize, (int)channels, (int)innerSize,173ocl::KernelArg::PtrReadOnly(dstMat), ocl::KernelArg::PtrReadWrite(bufMat));174if (!ksum.run(1, internal_globalSize, NULL, false))175return false;176177kdiv.args((int)totalSize, (int)outerSize, (int)channels, (int)innerSize,178ocl::KernelArg::PtrReadOnly(bufMat), ocl::KernelArg::PtrReadWrite(dstMat));179if (!kdiv.run(1, total_globalSize, NULL, false))180return false;181182return true;183}184#endif185186void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE187{188CV_TRACE_FUNCTION();189CV_TRACE_ARG_VALUE(name, "name", name.c_str());190191CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),192forward_ocl(inputs_arr, outputs_arr, internals_arr))193194if (inputs_arr.depth() == CV_16S)195{196forward_fallback(inputs_arr, outputs_arr, internals_arr);197return;198}199200std::vector<Mat> inputs, outputs, internals;201inputs_arr.getMatVector(inputs);202outputs_arr.getMatVector(outputs);203internals_arr.getMatVector(internals);204205const Mat &src = inputs[0];206Mat &dst = outputs[0];207208int axis = clamp(axisRaw, src.dims);209size_t outerSize = src.total(0, axis), channels = src.size[axis],210innerSize = src.total(axis + 1);211212CV_Assert(src.type() == CV_32F);213CV_Assert(src.isContinuous() && dst.isContinuous());214215const float *srcPtr = src.ptr<float>();216float *dstPtr = dst.ptr<float>();217float *bufPtr = internals[0].ptr<float>();218219size_t outerStep = src.total(axis);220size_t cnStep = src.total(axis + 1);221222//compute max along axis223for (size_t outerDim = 0; outerDim < outerSize; outerDim++)224{225size_t srcOffset = outerDim * outerStep;226size_t bufOffset = outerDim * cnStep;227228memcpy(bufPtr + bufOffset, srcPtr + srcOffset, innerSize * sizeof(float));229230for (size_t cnDim = 1; cnDim < channels; cnDim++)231{232for (size_t i = 0; i < innerSize; i++)233bufPtr[bufOffset + i] = std::max(bufPtr[bufOffset + i], srcPtr[srcOffset + cnDim * cnStep + i]);234}235}236237//subtract max238for (size_t outerDim = 0; outerDim < outerSize; outerDim++)239{240size_t srcOffset = outerDim * outerStep;241size_t bufOffset = outerDim * cnStep;242243for (size_t cnDim = 0; cnDim < channels; cnDim++)244{245const int offset = srcOffset + cnDim * cnStep;246for (size_t i = 0; i < innerSize; i++)247dstPtr[offset + i] = srcPtr[offset + i] - bufPtr[bufOffset + i];248}249}250251cv::exp(dst, dst);252253for (size_t outerDim = 0; outerDim < outerSize; outerDim++)254{255size_t srcOffset = outerDim * outerStep;256size_t bufOffset = outerDim * cnStep;257258//sum exp along axis259for (size_t i = 0; i < innerSize; i++)260bufPtr[bufOffset + i] = 0.f;261262for (size_t cnDim = 0; cnDim < channels; cnDim++)263{264const int offset = srcOffset + cnDim * cnStep;265for (size_t i = 0; i < innerSize; i++)266bufPtr[bufOffset + i] += dstPtr[offset + i];267}268269//divide by computed sum270for (size_t cnDim = 0; cnDim < channels; cnDim++)271{272const int offset = srcOffset + cnDim * cnStep;273for (size_t i = 0; i < innerSize; i++)274dstPtr[offset + i] /= bufPtr[bufOffset + i];275}276if (logSoftMax)277{278for (size_t cnDim = 0; cnDim < channels; cnDim++)279{280const int offset = srcOffset + cnDim * cnStep;281for (size_t i = 0; i < innerSize; i++)282dstPtr[offset + i] = log(dstPtr[offset + i]);283}284}285}286}287288virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE289{290#ifdef HAVE_VULKAN291vkcom::Tensor in = VkComTensor(inputs[0]);292int cAxis = clamp(axisRaw, in.dimNum());293std::shared_ptr<vkcom::OpBase> op(new vkcom::OpSoftmax(cAxis, logSoftMax));294return Ptr<BackendNode>(new VkComBackendNode(inputs, op));295#endif // HAVE_VULKAN296return Ptr<BackendNode>();297}298299300virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE301{302#ifdef HAVE_HALIDE303Halide::Buffer<float> inputBuffer = halideBuffer(inputs[0]);304int inW, inH, inC, inN;305getCanonicalSize(inputBuffer, &inW, &inH, &inC, &inN);306307if (inW != 1 || inH != 1)308CV_Error(cv::Error::StsNotImplemented,309"Halide backend for SoftMax with spatial size "310"more than 1x1 is not implemented");311312Halide::Var x("x"), y("y"), c("c"), n("n");313Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));314315Halide::Func expInput("expInput");316Halide::RDom r(0, inW, 0, inH, 0, inC);317expInput(x, y, c, n) = exp(inputBuffer(x, y, c, n));318Halide::Expr globalSum = sum(expInput(r.x, r.y, r.z, n));319top(x, y, c, n) = expInput(x, y, c, n) / globalSum;320return Ptr<BackendNode>(new HalideBackendNode(top));321#endif // HAVE_HALIDE322return Ptr<BackendNode>();323}324325virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE326{327#ifdef HAVE_INF_ENGINE328InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);329330InferenceEngine::LayerParams lp;331lp.name = name;332lp.type = "SoftMax";333lp.precision = InferenceEngine::Precision::FP32;334std::shared_ptr<InferenceEngine::SoftMaxLayer> ieLayer(new InferenceEngine::SoftMaxLayer(lp));335ieLayer->axis = clamp(axisRaw, input->dims.size());336return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));337#endif // HAVE_INF_ENGINE338return Ptr<BackendNode>();339}340341int64 getFLOPS(const std::vector<MatShape> &inputs,342const std::vector<MatShape> &outputs) const CV_OVERRIDE343{344CV_UNUSED(outputs); // suppress unused variable warning345int64 flops = 0;346347for (int i = 0; i < inputs.size(); i++)348{349flops += 4*total(inputs[i]);350}351352return flops;353}354355int axisRaw;356};357358Ptr<SoftmaxLayer> SoftmaxLayer::create(const LayerParams& params)359{360return Ptr<SoftmaxLayer>(new SoftMaxLayerImpl(params));361}362363}364}365366367