Path: blob/master/modules/dnn/src/layers/region_layer.cpp
16337 views
/*M ///////////////////////////////////////////////////////////////////////////////////////1//2// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.3//4// By downloading, copying, installing or using the software you agree to this license.5// If you do not agree to this license, do not download, install,6// copy or use the software.7//8//9// License Agreement10// For Open Source Computer Vision Library11//12// Copyright (C) 2013, OpenCV Foundation, all rights reserved.13// Copyright (C) 2017, Intel Corporation, all rights reserved.14// Third party copyrights are property of their respective owners.15//16// Redistribution and use in source and binary forms, with or without modification,17// are permitted provided that the following conditions are met:18//19// * Redistribution's of source code must retain the above copyright notice,20// this list of conditions and the following disclaimer.21//22// * Redistribution's in binary form must reproduce the above copyright notice,23// this list of conditions and the following disclaimer in the documentation24// and/or other materials provided with the distribution.25//26// * The name of the copyright holders may not be used to endorse or promote products27// derived from this software without specific prior written permission.28//29// This software is provided by the copyright holders and contributors "as is" and30// any express or implied warranties, including, but not limited to, the implied31// warranties of merchantability and fitness for a particular purpose are disclaimed.32// In no event shall the Intel Corporation or contributors be liable for any direct,33// indirect, incidental, special, exemplary, or consequential damages34// (including, but not limited to, procurement of substitute goods or services;35// loss of use, data, or profits; or business interruption) however caused36// and on any theory of liability, whether in contract, strict liability,37// or tort (including negligence or otherwise) arising in any way out of38// the use of this software, even if advised of the possibility of such damage.39//40//M*/4142#include "../precomp.hpp"43#include <opencv2/dnn/shape_utils.hpp>44#include <opencv2/dnn/all_layers.hpp>45#include "../nms.inl.hpp"4647#ifdef HAVE_OPENCL48#include "opencl_kernels_dnn.hpp"49#endif5051namespace cv52{53namespace dnn54{5556class RegionLayerImpl CV_FINAL : public RegionLayer57{58public:59int coords, classes, anchors, classfix;60float thresh, nmsThreshold;61bool useSoftmax, useLogistic;62#ifdef HAVE_OPENCL63UMat blob_umat;64#endif6566RegionLayerImpl(const LayerParams& params)67{68setParamsFrom(params);69CV_Assert(blobs.size() == 1);7071thresh = params.get<float>("thresh", 0.2);72coords = params.get<int>("coords", 4);73classes = params.get<int>("classes", 0);74anchors = params.get<int>("anchors", 5);75classfix = params.get<int>("classfix", 0);76useSoftmax = params.get<bool>("softmax", false);77useLogistic = params.get<bool>("logistic", false);78nmsThreshold = params.get<float>("nms_threshold", 0.4);7980CV_Assert(nmsThreshold >= 0.);81CV_Assert(coords == 4);82CV_Assert(classes >= 1);83CV_Assert(anchors >= 1);84CV_Assert(useLogistic || useSoftmax);85if (params.get<bool>("softmax_tree", false))86CV_Error(cv::Error::StsNotImplemented, "Yolo9000 is not implemented");87}8889bool getMemoryShapes(const std::vector<MatShape> &inputs,90const int requiredOutputs,91std::vector<MatShape> &outputs,92std::vector<MatShape> &internals) const CV_OVERRIDE93{94CV_Assert(inputs.size() > 0);95// channels == cell_size*anchors96CV_Assert(inputs[0][3] == (1 + coords + classes)*anchors);97int batch_size = inputs[0][0];98if(batch_size > 1)99outputs = std::vector<MatShape>(1, shape(batch_size, inputs[0][1] * inputs[0][2] * anchors, inputs[0][3] / anchors));100else101outputs = std::vector<MatShape>(1, shape(inputs[0][1] * inputs[0][2] * anchors, inputs[0][3] / anchors));102return false;103}104105float logistic_activate(float x) { return 1.F / (1.F + exp(-x)); }106107void softmax_activate(const float* input, const int n, const float temp, float* output)108{109int i;110float sum = 0;111float largest = -FLT_MAX;112for (i = 0; i < n; ++i) {113if (input[i] > largest) largest = input[i];114}115for (i = 0; i < n; ++i) {116float e = exp((input[i] - largest) / temp);117sum += e;118output[i] = e;119}120for (i = 0; i < n; ++i) {121output[i] /= sum;122}123}124125#ifdef HAVE_OPENCL126bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)127{128if (blob_umat.empty())129blobs[0].copyTo(blob_umat);130131std::vector<UMat> inputs;132std::vector<UMat> outputs;133134// TODO: implement a logistic activation to classification scores.135if (useLogistic || inps.depth() == CV_16S)136return false;137138inps.getUMatVector(inputs);139outs.getUMatVector(outputs);140141CV_Assert(inputs.size() >= 1);142int const cell_size = classes + coords + 1;143144for (size_t ii = 0; ii < outputs.size(); ii++)145{146UMat& inpBlob = inputs[ii];147UMat& outBlob = outputs[ii];148149int batch_size = inpBlob.size[0];150int rows = inpBlob.size[1];151int cols = inpBlob.size[2];152153// channels == cell_size*anchors, see l. 94154int sample_size = cell_size*rows*cols*anchors;155156ocl::Kernel logistic_kernel("logistic_activ", ocl::dnn::region_oclsrc);157size_t nanchors = rows*cols*anchors*batch_size;158logistic_kernel.set(0, (int)nanchors);159logistic_kernel.set(1, ocl::KernelArg::PtrReadOnly(inpBlob));160logistic_kernel.set(2, (int)cell_size);161logistic_kernel.set(3, ocl::KernelArg::PtrWriteOnly(outBlob));162logistic_kernel.run(1, &nanchors, NULL, false);163164if (useSoftmax)165{166// Yolo v2167// softmax activation for Probability, for each grid cell (X x Y x Anchor-index)168ocl::Kernel softmax_kernel("softmax_activ", ocl::dnn::region_oclsrc);169size_t nanchors = rows*cols*anchors*batch_size;170softmax_kernel.set(0, (int)nanchors);171softmax_kernel.set(1, ocl::KernelArg::PtrReadOnly(inpBlob));172softmax_kernel.set(2, ocl::KernelArg::PtrReadOnly(blob_umat));173softmax_kernel.set(3, (int)cell_size);174softmax_kernel.set(4, (int)classes);175softmax_kernel.set(5, (int)classfix);176softmax_kernel.set(6, (int)rows);177softmax_kernel.set(7, (int)cols);178softmax_kernel.set(8, (int)anchors);179softmax_kernel.set(9, (float)thresh);180softmax_kernel.set(10, ocl::KernelArg::PtrWriteOnly(outBlob));181if (!softmax_kernel.run(1, &nanchors, NULL, false))182return false;183}184185if (nmsThreshold > 0) {186Mat mat = outBlob.getMat(ACCESS_WRITE);187float *dstData = mat.ptr<float>();188for (int b = 0; b < batch_size; ++b)189do_nms_sort(dstData + b*sample_size, rows*cols*anchors, thresh, nmsThreshold);190}191192}193194return true;195}196#endif197198void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE199{200CV_TRACE_FUNCTION();201CV_TRACE_ARG_VALUE(name, "name", name.c_str());202203CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),204forward_ocl(inputs_arr, outputs_arr, internals_arr))205206if (inputs_arr.depth() == CV_16S)207{208forward_fallback(inputs_arr, outputs_arr, internals_arr);209return;210}211212std::vector<Mat> inputs, outputs, internals;213inputs_arr.getMatVector(inputs);214outputs_arr.getMatVector(outputs);215internals_arr.getMatVector(internals);216217CV_Assert(inputs.size() >= 1);218CV_Assert(outputs.size() == 1);219int const cell_size = classes + coords + 1;220221const float* biasData = blobs[0].ptr<float>();222223for (size_t ii = 0; ii < outputs.size(); ii++)224{225Mat &inpBlob = inputs[ii];226Mat &outBlob = outputs[ii];227228int batch_size = inpBlob.size[0];229int rows = inpBlob.size[1];230int cols = inpBlob.size[2];231232// address length for one image in batch, both for input and output233int sample_size = cell_size*rows*cols*anchors;234235// assert that the comment above is true236CV_Assert(sample_size*batch_size == inpBlob.total());237CV_Assert(sample_size*batch_size == outBlob.total());238239CV_Assert(inputs.size() < 2 || inputs[1].dims == 4);240int hNorm = inputs.size() > 1 ? inputs[1].size[2] : rows;241int wNorm = inputs.size() > 1 ? inputs[1].size[3] : cols;242243const float *srcData = inpBlob.ptr<float>();244float *dstData = outBlob.ptr<float>();245246// logistic activation for t0, for each grid cell (X x Y x Anchor-index)247for (int i = 0; i < batch_size*rows*cols*anchors; ++i) {248int index = cell_size*i;249float x = srcData[index + 4];250dstData[index + 4] = logistic_activate(x); // logistic activation251}252253if (useSoftmax) { // Yolo v2254for (int i = 0; i < batch_size*rows*cols*anchors; ++i) {255int index = cell_size*i;256softmax_activate(srcData + index + 5, classes, 1, dstData + index + 5);257}258}259else if (useLogistic) { // Yolo v3260for (int i = 0; i < batch_size*rows*cols*anchors; ++i){261int index = cell_size*i;262const float* input = srcData + index + 5;263float* output = dstData + index + 5;264for (int c = 0; c < classes; ++c)265output[c] = logistic_activate(input[c]);266}267}268for (int b = 0; b < batch_size; ++b)269for (int x = 0; x < cols; ++x)270for(int y = 0; y < rows; ++y)271for (int a = 0; a < anchors; ++a) {272// relative start address for image b within the batch data273int index_sample_offset = sample_size*b;274int index = (y*cols + x)*anchors + a; // index for each grid-cell & anchor275int p_index = index_sample_offset + index * cell_size + 4;276float scale = dstData[p_index];277if (classfix == -1 && scale < .5) scale = 0; // if(t0 < 0.5) t0 = 0;278int box_index = index_sample_offset + index * cell_size;279280dstData[box_index + 0] = (x + logistic_activate(srcData[box_index + 0])) / cols;281dstData[box_index + 1] = (y + logistic_activate(srcData[box_index + 1])) / rows;282dstData[box_index + 2] = exp(srcData[box_index + 2]) * biasData[2 * a] / hNorm;283dstData[box_index + 3] = exp(srcData[box_index + 3]) * biasData[2 * a + 1] / wNorm;284285int class_index = index_sample_offset + index * cell_size + 5;286for (int j = 0; j < classes; ++j) {287float prob = scale*dstData[class_index + j]; // prob = IoU(box, object) = t0 * class-probability288dstData[class_index + j] = (prob > thresh) ? prob : 0; // if (IoU < threshold) IoU = 0;289}290}291if (nmsThreshold > 0) {292for (int b = 0; b < batch_size; ++b){293do_nms_sort(dstData+b*sample_size, rows*cols*anchors, thresh, nmsThreshold);294}295}296}297}298299void do_nms_sort(float *detections, int total, float score_thresh, float nms_thresh)300{301std::vector<Rect2d> boxes(total);302std::vector<float> scores(total);303304for (int i = 0; i < total; ++i)305{306Rect2d &b = boxes[i];307int box_index = i * (classes + coords + 1);308b.width = detections[box_index + 2];309b.height = detections[box_index + 3];310b.x = detections[box_index + 0] - b.width / 2;311b.y = detections[box_index + 1] - b.height / 2;312}313314std::vector<int> indices;315for (int k = 0; k < classes; ++k)316{317for (int i = 0; i < total; ++i)318{319int box_index = i * (classes + coords + 1);320int class_index = box_index + 5;321scores[i] = detections[class_index + k];322detections[class_index + k] = 0;323}324NMSBoxes(boxes, scores, score_thresh, nms_thresh, indices);325for (int i = 0, n = indices.size(); i < n; ++i)326{327int box_index = indices[i] * (classes + coords + 1);328int class_index = box_index + 5;329detections[class_index + k] = scores[indices[i]];330}331}332}333334virtual int64 getFLOPS(const std::vector<MatShape> &inputs,335const std::vector<MatShape> &outputs) const CV_OVERRIDE336{337CV_UNUSED(outputs); // suppress unused variable warning338339int64 flops = 0;340for(int i = 0; i < inputs.size(); i++)341{342flops += 60*total(inputs[i]);343}344return flops;345}346};347348Ptr<RegionLayer> RegionLayer::create(const LayerParams& params)349{350return Ptr<RegionLayer>(new RegionLayerImpl(params));351}352353} // namespace dnn354} // namespace cv355356357