Path: blob/master/modules/dnn/src/layers/prior_box_layer.cpp
16337 views
/*M///////////////////////////////////////////////////////////////////////////////////////1//2// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.3//4// By downloading, copying, installing or using the software you agree to this license.5// If you do not agree to this license, do not download, install,6// copy or use the software.7//8//9// License Agreement10// For Open Source Computer Vision Library11//12// Copyright (C) 2013, OpenCV Foundation, all rights reserved.13// Copyright (C) 2017, Intel Corporation, all rights reserved.14// Third party copyrights are property of their respective owners.15//16// Redistribution and use in source and binary forms, with or without modification,17// are permitted provided that the following conditions are met:18//19// * Redistribution's of source code must retain the above copyright notice,20// this list of conditions and the following disclaimer.21//22// * Redistribution's in binary form must reproduce the above copyright notice,23// this list of conditions and the following disclaimer in the documentation24// and/or other materials provided with the distribution.25//26// * The name of the copyright holders may not be used to endorse or promote products27// derived from this software without specific prior written permission.28//29// This software is provided by the copyright holders and contributors "as is" and30// any express or implied warranties, including, but not limited to, the implied31// warranties of merchantability and fitness for a particular purpose are disclaimed.32// In no event shall the Intel Corporation or contributors be liable for any direct,33// indirect, incidental, special, exemplary, or consequential damages34// (including, but not limited to, procurement of substitute goods or services;35// loss of use, data, or profits; or business interruption) however caused36// and on any theory of liability, whether in contract, strict liability,37// or tort (including negligence or otherwise) arising in any way out of38// the use of this software, even if advised of the possibility of such damage.39//40//M*/4142#include "../precomp.hpp"43#include "layers_common.hpp"44#include "../op_inf_engine.hpp"45#include "../op_vkcom.hpp"46#include <float.h>47#include <algorithm>48#include <cmath>4950#ifdef HAVE_OPENCL51#include "opencl_kernels_dnn.hpp"52#endif5354namespace cv55{56namespace dnn57{5859class PriorBoxLayerImpl CV_FINAL : public PriorBoxLayer60{61public:62static bool getParameterDict(const LayerParams ¶ms,63const std::string ¶meterName,64DictValue& result)65{66if (!params.has(parameterName))67{68return false;69}7071result = params.get(parameterName);72return true;73}7475template<typename T>76T getParameter(const LayerParams ¶ms,77const std::string ¶meterName,78const size_t &idx=0,79const bool required=true,80const T& defaultValue=T())81{82DictValue dictValue;83bool success = getParameterDict(params, parameterName, dictValue);84if(!success)85{86if(required)87{88std::string message = _layerName;89message += " layer parameter does not contain ";90message += parameterName;91message += " parameter.";92CV_Error(Error::StsBadArg, message);93}94else95{96return defaultValue;97}98}99return dictValue.get<T>(idx);100}101102void getAspectRatios(const LayerParams ¶ms)103{104DictValue aspectRatioParameter;105bool aspectRatioRetieved = getParameterDict(params, "aspect_ratio", aspectRatioParameter);106if (!aspectRatioRetieved)107return;108109for (int i = 0; i < aspectRatioParameter.size(); ++i)110{111float aspectRatio = aspectRatioParameter.get<float>(i);112bool alreadyExists = fabs(aspectRatio - 1.f) < 1e-6f;113114for (size_t j = 0; j < _aspectRatios.size() && !alreadyExists; ++j)115{116alreadyExists = fabs(aspectRatio - _aspectRatios[j]) < 1e-6;117}118if (!alreadyExists)119{120_aspectRatios.push_back(aspectRatio);121if (_flip)122{123_aspectRatios.push_back(1./aspectRatio);124}125}126}127}128129static void getParams(const std::string& name, const LayerParams ¶ms,130std::vector<float>* values)131{132DictValue dict;133if (getParameterDict(params, name, dict))134{135values->resize(dict.size());136for (int i = 0; i < dict.size(); ++i)137{138(*values)[i] = dict.get<float>(i);139}140}141else142values->clear();143}144145void getVariance(const LayerParams ¶ms)146{147DictValue varianceParameter;148bool varianceParameterRetrieved = getParameterDict(params, "variance", varianceParameter);149CV_Assert(varianceParameterRetrieved);150151int varianceSize = varianceParameter.size();152if (varianceSize > 1)153{154// Must and only provide 4 variance.155CV_Assert(varianceSize == 4);156157for (int i = 0; i < varianceSize; ++i)158{159float variance = varianceParameter.get<float>(i);160CV_Assert(variance > 0);161_variance.push_back(variance);162}163}164else165{166if (varianceSize == 1)167{168float variance = varianceParameter.get<float>(0);169CV_Assert(variance > 0);170_variance.push_back(variance);171}172else173{174// Set default to 0.1.175_variance.push_back(0.1f);176}177}178}179180PriorBoxLayerImpl(const LayerParams ¶ms)181{182setParamsFrom(params);183_minSize = getParameter<float>(params, "min_size", 0, false, 0);184_flip = getParameter<bool>(params, "flip", 0, false, true);185_clip = getParameter<bool>(params, "clip", 0, false, true);186_bboxesNormalized = getParameter<bool>(params, "normalized_bbox", 0, false, true);187188_aspectRatios.clear();189190getAspectRatios(params);191getVariance(params);192193_maxSize = -1;194if (params.has("max_size"))195{196_maxSize = params.get("max_size").get<float>(0);197CV_Assert(_maxSize > _minSize);198}199200std::vector<float> widths, heights;201getParams("width", params, &widths);202getParams("height", params, &heights);203_explicitSizes = !widths.empty();204CV_Assert(widths.size() == heights.size());205206if (_explicitSizes)207{208CV_Assert(_aspectRatios.empty());209CV_Assert(!params.has("min_size"));210CV_Assert(!params.has("max_size"));211_boxWidths = widths;212_boxHeights = heights;213}214else215{216CV_Assert(_minSize > 0);217_boxWidths.resize(1 + (_maxSize > 0 ? 1 : 0) + _aspectRatios.size());218_boxHeights.resize(_boxWidths.size());219_boxWidths[0] = _boxHeights[0] = _minSize;220221int i = 1;222if (_maxSize > 0)223{224// second prior: aspect_ratio = 1, size = sqrt(min_size * max_size)225_boxWidths[i] = _boxHeights[i] = sqrt(_minSize * _maxSize);226i += 1;227}228229// rest of priors230for (size_t r = 0; r < _aspectRatios.size(); ++r)231{232float arSqrt = sqrt(_aspectRatios[r]);233_boxWidths[i + r] = _minSize * arSqrt;234_boxHeights[i + r] = _minSize / arSqrt;235}236}237CV_Assert(_boxWidths.size() == _boxHeights.size());238_numPriors = _boxWidths.size();239240if (params.has("step_h") || params.has("step_w")) {241CV_Assert(!params.has("step"));242_stepY = getParameter<float>(params, "step_h");243CV_Assert(_stepY > 0.);244_stepX = getParameter<float>(params, "step_w");245CV_Assert(_stepX > 0.);246} else if (params.has("step")) {247const float step = getParameter<float>(params, "step");248CV_Assert(step > 0);249_stepY = step;250_stepX = step;251} else {252_stepY = 0;253_stepX = 0;254}255if (params.has("offset_h") || params.has("offset_w"))256{257CV_Assert_N(!params.has("offset"), params.has("offset_h"), params.has("offset_w"));258getParams("offset_h", params, &_offsetsY);259getParams("offset_w", params, &_offsetsX);260CV_Assert(_offsetsX.size() == _offsetsY.size());261_numPriors *= std::max((size_t)1, 2 * (_offsetsX.size() - 1));262}263else264{265float offset = getParameter<float>(params, "offset", 0, false, 0.5);266_offsetsX.assign(1, offset);267_offsetsY.assign(1, offset);268}269}270271virtual bool supportBackend(int backendId) CV_OVERRIDE272{273return backendId == DNN_BACKEND_OPENCV ||274backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine() ||275backendId == DNN_BACKEND_VKCOM && haveVulkan();276}277278bool getMemoryShapes(const std::vector<MatShape> &inputs,279const int requiredOutputs,280std::vector<MatShape> &outputs,281std::vector<MatShape> &internals) const CV_OVERRIDE282{283CV_Assert(!inputs.empty());284285int layerHeight = inputs[0][2];286int layerWidth = inputs[0][3];287288// Since all images in a batch has same height and width, we only need to289// generate one set of priors which can be shared across all images.290size_t outNum = 1;291// 2 channels. First channel stores the mean of each prior coordinate.292// Second channel stores the variance of each prior coordinate.293size_t outChannels = 2;294295outputs.resize(1, shape(outNum, outChannels,296layerHeight * layerWidth * _numPriors * 4));297298return false;299}300301void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE302{303std::vector<Mat> inputs;304inputs_arr.getMatVector(inputs);305306CV_CheckGT(inputs.size(), (size_t)1, "");307CV_CheckEQ(inputs[0].dims, 4, ""); CV_CheckEQ(inputs[1].dims, 4, "");308int layerWidth = inputs[0].size[3];309int layerHeight = inputs[0].size[2];310311int imageWidth = inputs[1].size[3];312int imageHeight = inputs[1].size[2];313314_stepY = _stepY == 0 ? (static_cast<float>(imageHeight) / layerHeight) : _stepY;315_stepX = _stepX == 0 ? (static_cast<float>(imageWidth) / layerWidth) : _stepX;316}317318#ifdef HAVE_OPENCL319bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)320{321std::vector<UMat> inputs;322std::vector<UMat> outputs;323324bool use_half = (inps.depth() == CV_16S);325inps.getUMatVector(inputs);326outs.getUMatVector(outputs);327328int _layerWidth = inputs[0].size[3];329int _layerHeight = inputs[0].size[2];330331int _imageWidth = inputs[1].size[3];332int _imageHeight = inputs[1].size[2];333334if (umat_offsetsX.empty())335{336Mat offsetsX(1, _offsetsX.size(), CV_32FC1, &_offsetsX[0]);337Mat offsetsY(1, _offsetsY.size(), CV_32FC1, &_offsetsY[0]);338Mat variance(1, _variance.size(), CV_32FC1, &_variance[0]);339Mat widths(1, _boxWidths.size(), CV_32FC1, &_boxWidths[0]);340Mat heights(1, _boxHeights.size(), CV_32FC1, &_boxHeights[0]);341342offsetsX.copyTo(umat_offsetsX);343offsetsY.copyTo(umat_offsetsY);344variance.copyTo(umat_variance);345widths.copyTo(umat_widths);346heights.copyTo(umat_heights);347}348349String opts;350if (use_half)351opts = "-DDtype=half -DDtype4=half4 -Dconvert_T=convert_half4";352else353opts = "-DDtype=float -DDtype4=float4 -Dconvert_T=convert_float4";354355size_t nthreads = _layerHeight * _layerWidth;356ocl::Kernel kernel("prior_box", ocl::dnn::prior_box_oclsrc, opts);357358kernel.set(0, (int)nthreads);359kernel.set(1, (float)_stepX);360kernel.set(2, (float)_stepY);361kernel.set(3, ocl::KernelArg::PtrReadOnly(umat_offsetsX));362kernel.set(4, ocl::KernelArg::PtrReadOnly(umat_offsetsY));363kernel.set(5, (int)_offsetsX.size());364kernel.set(6, ocl::KernelArg::PtrReadOnly(umat_widths));365kernel.set(7, ocl::KernelArg::PtrReadOnly(umat_heights));366kernel.set(8, (int)_boxWidths.size());367kernel.set(9, ocl::KernelArg::PtrWriteOnly(outputs[0]));368kernel.set(10, (int)_layerHeight);369kernel.set(11, (int)_layerWidth);370kernel.set(12, (int)_imageHeight);371kernel.set(13, (int)_imageWidth);372kernel.run(1, &nthreads, NULL, false);373374// clip the prior's coordinate such that it is within [0, 1]375if (_clip)376{377ocl::Kernel kernel("clip", ocl::dnn::prior_box_oclsrc, opts);378size_t nthreads = _layerHeight * _layerWidth * _numPriors * 4;379if (!kernel.args((int)nthreads, ocl::KernelArg::PtrReadWrite(outputs[0]))380.run(1, &nthreads, NULL, false))381return false;382}383384// set the variance.385{386ocl::Kernel kernel("set_variance", ocl::dnn::prior_box_oclsrc, opts);387int offset = total(shape(outputs[0]), 2);388size_t nthreads = _layerHeight * _layerWidth * _numPriors;389kernel.set(0, (int)nthreads);390kernel.set(1, (int)offset);391kernel.set(2, (int)_variance.size());392kernel.set(3, ocl::KernelArg::PtrReadOnly(umat_variance));393kernel.set(4, ocl::KernelArg::PtrWriteOnly(outputs[0]));394if (!kernel.run(1, &nthreads, NULL, false))395return false;396}397return true;398}399#endif400401void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE402{403CV_TRACE_FUNCTION();404CV_TRACE_ARG_VALUE(name, "name", name.c_str());405406CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),407forward_ocl(inputs_arr, outputs_arr, internals_arr))408409if (inputs_arr.depth() == CV_16S)410{411forward_fallback(inputs_arr, outputs_arr, internals_arr);412return;413}414415std::vector<Mat> inputs, outputs;416inputs_arr.getMatVector(inputs);417outputs_arr.getMatVector(outputs);418419CV_Assert(inputs.size() == 2);420421int _layerWidth = inputs[0].size[3];422int _layerHeight = inputs[0].size[2];423424int _imageWidth = inputs[1].size[3];425int _imageHeight = inputs[1].size[2];426427float* outputPtr = outputs[0].ptr<float>();428float _boxWidth, _boxHeight;429for (size_t h = 0; h < _layerHeight; ++h)430{431for (size_t w = 0; w < _layerWidth; ++w)432{433for (size_t i = 0; i < _boxWidths.size(); ++i)434{435_boxWidth = _boxWidths[i];436_boxHeight = _boxHeights[i];437for (int j = 0; j < _offsetsX.size(); ++j)438{439float center_x = (w + _offsetsX[j]) * _stepX;440float center_y = (h + _offsetsY[j]) * _stepY;441outputPtr = addPrior(center_x, center_y, _boxWidth, _boxHeight, _imageWidth,442_imageHeight, _bboxesNormalized, outputPtr);443}444}445}446}447// clip the prior's coordinate such that it is within [0, 1]448if (_clip)449{450int _outChannelSize = _layerHeight * _layerWidth * _numPriors * 4;451outputPtr = outputs[0].ptr<float>();452for (size_t d = 0; d < _outChannelSize; ++d)453{454outputPtr[d] = std::min<float>(std::max<float>(outputPtr[d], 0.), 1.);455}456}457// set the variance.458outputPtr = outputs[0].ptr<float>(0, 1);459if(_variance.size() == 1)460{461Mat secondChannel(1, outputs[0].size[2], CV_32F, outputPtr);462secondChannel.setTo(Scalar::all(_variance[0]));463}464else465{466int count = 0;467for (size_t h = 0; h < _layerHeight; ++h)468{469for (size_t w = 0; w < _layerWidth; ++w)470{471for (size_t i = 0; i < _numPriors; ++i)472{473for (int j = 0; j < 4; ++j)474{475outputPtr[count] = _variance[j];476++count;477}478}479}480}481}482}483484virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &input) CV_OVERRIDE485{486#ifdef HAVE_VULKAN487std::shared_ptr<vkcom::OpBase> op(new vkcom::OpPriorBox(_stepX, _stepY,488_clip, _numPriors,489_variance, _offsetsX,490_offsetsY, _boxWidths,491_boxHeights));492return Ptr<BackendNode>(new VkComBackendNode(input, op));493#endif // HAVE_VULKAN494return Ptr<BackendNode>();495}496497virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE498{499#ifdef HAVE_INF_ENGINE500InferenceEngine::LayerParams lp;501lp.name = name;502lp.type = _explicitSizes ? "PriorBoxClustered" : "PriorBox";503lp.precision = InferenceEngine::Precision::FP32;504std::shared_ptr<InferenceEngine::CNNLayer> ieLayer(new InferenceEngine::CNNLayer(lp));505506if (_explicitSizes)507{508CV_Assert(!_boxWidths.empty()); CV_Assert(!_boxHeights.empty());509CV_Assert(_boxWidths.size() == _boxHeights.size());510ieLayer->params["width"] = format("%f", _boxWidths[0]);511ieLayer->params["height"] = format("%f", _boxHeights[0]);512for (int i = 1; i < _boxWidths.size(); ++i)513{514ieLayer->params["width"] += format(",%f", _boxWidths[i]);515ieLayer->params["height"] += format(",%f", _boxHeights[i]);516}517}518else519{520ieLayer->params["min_size"] = format("%f", _minSize);521ieLayer->params["max_size"] = _maxSize > 0 ? format("%f", _maxSize) : "";522523if (!_aspectRatios.empty())524{525ieLayer->params["aspect_ratio"] = format("%f", _aspectRatios[0]);526for (int i = 1; i < _aspectRatios.size(); ++i)527ieLayer->params["aspect_ratio"] += format(",%f", _aspectRatios[i]);528}529}530531ieLayer->params["flip"] = "0"; // We already flipped aspect ratios.532ieLayer->params["clip"] = _clip ? "1" : "0";533534CV_Assert(!_variance.empty());535ieLayer->params["variance"] = format("%f", _variance[0]);536for (int i = 1; i < _variance.size(); ++i)537ieLayer->params["variance"] += format(",%f", _variance[i]);538539if (_stepX == _stepY)540{541ieLayer->params["step"] = format("%f", _stepX);542ieLayer->params["step_h"] = "0.0";543ieLayer->params["step_w"] = "0.0";544}545else546{547ieLayer->params["step"] = "0.0";548ieLayer->params["step_h"] = format("%f", _stepY);549ieLayer->params["step_w"] = format("%f", _stepX);550}551CV_CheckEQ(_offsetsX.size(), (size_t)1, ""); CV_CheckEQ(_offsetsY.size(), (size_t)1, ""); CV_CheckEQ(_offsetsX[0], _offsetsY[0], "");552ieLayer->params["offset"] = format("%f", _offsetsX[0]);553554return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));555#endif // HAVE_INF_ENGINE556return Ptr<BackendNode>();557}558559virtual int64 getFLOPS(const std::vector<MatShape> &inputs,560const std::vector<MatShape> &outputs) const CV_OVERRIDE561{562CV_UNUSED(outputs); // suppress unused variable warning563long flops = 0;564565for (int i = 0; i < inputs.size(); i++)566{567flops += total(inputs[i], 2) * _numPriors * 4;568}569570return flops;571}572573private:574float _minSize;575float _maxSize;576577float _stepX, _stepY;578579std::vector<float> _aspectRatios;580std::vector<float> _variance;581std::vector<float> _offsetsX;582std::vector<float> _offsetsY;583// Precomputed final widths and heights based on aspect ratios or explicit sizes.584std::vector<float> _boxWidths;585std::vector<float> _boxHeights;586587#ifdef HAVE_OPENCL588UMat umat_offsetsX;589UMat umat_offsetsY;590UMat umat_widths;591UMat umat_heights;592UMat umat_variance;593#endif594595bool _flip;596bool _clip;597bool _explicitSizes;598bool _bboxesNormalized;599600size_t _numPriors;601602static const size_t _numAxes = 4;603static const std::string _layerName;604605static float* addPrior(float center_x, float center_y, float width, float height,606float imgWidth, float imgHeight, bool normalized, float* dst)607{608if (normalized)609{610dst[0] = (center_x - width * 0.5f) / imgWidth; // xmin611dst[1] = (center_y - height * 0.5f) / imgHeight; // ymin612dst[2] = (center_x + width * 0.5f) / imgWidth; // xmax613dst[3] = (center_y + height * 0.5f) / imgHeight; // ymax614}615else616{617dst[0] = center_x - width * 0.5f; // xmin618dst[1] = center_y - height * 0.5f; // ymin619dst[2] = center_x + width * 0.5f - 1.0f; // xmax620dst[3] = center_y + height * 0.5f - 1.0f; // ymax621}622return dst + 4;623}624};625626const std::string PriorBoxLayerImpl::_layerName = std::string("PriorBox");627628Ptr<PriorBoxLayer> PriorBoxLayer::create(const LayerParams ¶ms)629{630return Ptr<PriorBoxLayer>(new PriorBoxLayerImpl(params));631}632633}634}635636637