Path: blob/master/modules/dnn/src/layers/proposal_layer.cpp
16337 views
// This file is part of OpenCV project.1// It is subject to the license terms in the LICENSE file found in the top-level directory2// of this distribution and at http://opencv.org/license.html.34// Copyright (C) 2017, Intel Corporation, all rights reserved.5// Third party copyrights are property of their respective owners.6#include "../precomp.hpp"7#include "layers_common.hpp"8#include "../op_inf_engine.hpp"910namespace cv { namespace dnn {1112class ProposalLayerImpl CV_FINAL : public ProposalLayer13{14public:15ProposalLayerImpl(const LayerParams& params)16{17setParamsFrom(params);1819featStride = params.get<uint32_t>("feat_stride", 16);20baseSize = params.get<uint32_t>("base_size", 16);21// uint32_t minSize = params.get<uint32_t>("min_size", 16);22keepTopBeforeNMS = params.get<uint32_t>("pre_nms_topn", 6000);23keepTopAfterNMS = params.get<uint32_t>("post_nms_topn", 300);24nmsThreshold = params.get<float>("nms_thresh", 0.7);25ratios = params.get("ratio");26scales = params.get("scale");2728{29LayerParams lp;30lp.set("step", featStride);31lp.set("flip", false);32lp.set("clip", false);33lp.set("normalized_bbox", false);34lp.set("offset", 0.5 * baseSize / featStride);3536// Unused values.37float variance[] = {0.1f, 0.1f, 0.2f, 0.2f};38lp.set("variance", DictValue::arrayReal<float*>(&variance[0], 4));3940// Compute widths and heights explicitly.41std::vector<float> widths, heights;42widths.reserve(ratios.size() * scales.size());43heights.reserve(ratios.size() * scales.size());44for (int i = 0; i < ratios.size(); ++i)45{46float ratio = ratios.get<float>(i);47for (int j = 0; j < scales.size(); ++j)48{49float scale = scales.get<float>(j);50float width = std::floor(baseSize / sqrt(ratio) + 0.5f);51float height = std::floor(width * ratio + 0.5f);52widths.push_back(scale * width);53heights.push_back(scale * height);54}55}56lp.set("width", DictValue::arrayReal<float*>(&widths[0], widths.size()));57lp.set("height", DictValue::arrayReal<float*>(&heights[0], heights.size()));5859priorBoxLayer = PriorBoxLayer::create(lp);60}61{62int order[] = {0, 2, 3, 1};63LayerParams lp;64lp.set("order", DictValue::arrayInt<int*>(&order[0], 4));6566deltasPermute = PermuteLayer::create(lp);67scoresPermute = PermuteLayer::create(lp);68}69{70LayerParams lp;71lp.set("code_type", "CENTER_SIZE");72lp.set("num_classes", 1);73lp.set("share_location", true);74lp.set("background_label_id", 1); // We won't pass background scores so set it out of range [0, num_classes)75lp.set("variance_encoded_in_target", true);76lp.set("keep_top_k", keepTopAfterNMS);77lp.set("top_k", keepTopBeforeNMS);78lp.set("nms_threshold", nmsThreshold);79lp.set("normalized_bbox", false);80lp.set("clip", true);8182detectionOutputLayer = DetectionOutputLayer::create(lp);83}84}8586virtual bool supportBackend(int backendId) CV_OVERRIDE87{88return backendId == DNN_BACKEND_OPENCV ||89backendId == DNN_BACKEND_INFERENCE_ENGINE && preferableTarget != DNN_TARGET_MYRIAD;90}9192bool getMemoryShapes(const std::vector<MatShape> &inputs,93const int requiredOutputs,94std::vector<MatShape> &outputs,95std::vector<MatShape> &internals) const CV_OVERRIDE96{97// We need to allocate the following blobs:98// - output priors from PriorBoxLayer99// - permuted priors100// - permuted scores101CV_Assert(inputs.size() == 3);102103const MatShape& scores = inputs[0];104const MatShape& bboxDeltas = inputs[1];105106std::vector<MatShape> layerInputs, layerOutputs, layerInternals;107108// Prior boxes layer.109layerInputs.assign(1, scores);110priorBoxLayer->getMemoryShapes(layerInputs, 1, layerOutputs, layerInternals);111CV_Assert(layerOutputs.size() == 1);112CV_Assert(layerInternals.empty());113internals.push_back(layerOutputs[0]);114115// Scores permute layer.116CV_Assert(scores.size() == 4);117MatShape objectScores = scores;118CV_Assert((scores[1] & 1) == 0); // Number of channels is even.119objectScores[1] /= 2;120layerInputs.assign(1, objectScores);121scoresPermute->getMemoryShapes(layerInputs, 1, layerOutputs, layerInternals);122CV_Assert(layerOutputs.size() == 1);123CV_Assert(layerInternals.empty());124internals.push_back(layerOutputs[0]);125126// BBox predictions permute layer.127layerInputs.assign(1, bboxDeltas);128deltasPermute->getMemoryShapes(layerInputs, 1, layerOutputs, layerInternals);129CV_Assert(layerOutputs.size() == 1);130CV_Assert(layerInternals.empty());131internals.push_back(layerOutputs[0]);132133outputs.resize(2);134outputs[0] = shape(keepTopAfterNMS, 5);135outputs[1] = shape(keepTopAfterNMS, 1);136return false;137}138139void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE140{141std::vector<Mat> inputs;142inputs_arr.getMatVector(inputs);143144std::vector<Mat> layerInputs;145std::vector<Mat> layerOutputs;146147// Scores permute layer.148Mat scores = getObjectScores(inputs[0]);149layerInputs.assign(1, scores);150layerOutputs.assign(1, Mat(shape(scores.size[0], scores.size[2],151scores.size[3], scores.size[1]), CV_32FC1));152scoresPermute->finalize(layerInputs, layerOutputs);153154// BBox predictions permute layer.155const Mat& bboxDeltas = inputs[1];156CV_Assert(bboxDeltas.dims == 4);157layerInputs.assign(1, bboxDeltas);158layerOutputs.assign(1, Mat(shape(bboxDeltas.size[0], bboxDeltas.size[2],159bboxDeltas.size[3], bboxDeltas.size[1]), CV_32FC1));160deltasPermute->finalize(layerInputs, layerOutputs);161}162163#ifdef HAVE_OPENCL164bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)165{166std::vector<UMat> inputs;167std::vector<UMat> outputs;168std::vector<UMat> internals;169170if (inputs_.depth() == CV_16S)171return false;172173inputs_.getUMatVector(inputs);174outputs_.getUMatVector(outputs);175internals_.getUMatVector(internals);176177CV_Assert(inputs.size() == 3);178CV_Assert(internals.size() == 3);179const UMat& scores = inputs[0];180const UMat& bboxDeltas = inputs[1];181const UMat& imInfo = inputs[2];182UMat& priorBoxes = internals[0];183UMat& permuttedScores = internals[1];184UMat& permuttedDeltas = internals[2];185186CV_Assert(imInfo.total() >= 2);187// We've chosen the smallest data type because we need just a shape from it.188Mat szMat;189imInfo.copyTo(szMat);190int rows = (int)szMat.at<float>(0);191int cols = (int)szMat.at<float>(1);192umat_fakeImageBlob.create(shape(1, 1, rows, cols), CV_8UC1);193umat_fakeImageBlob.setTo(0);194195// Generate prior boxes.196std::vector<UMat> layerInputs(2), layerOutputs(1, priorBoxes);197layerInputs[0] = scores;198layerInputs[1] = umat_fakeImageBlob;199priorBoxLayer->forward(layerInputs, layerOutputs, internals);200201// Permute scores.202layerInputs.assign(1, getObjectScores(scores));203layerOutputs.assign(1, permuttedScores);204scoresPermute->forward(layerInputs, layerOutputs, internals);205206// Permute deltas.207layerInputs.assign(1, bboxDeltas);208layerOutputs.assign(1, permuttedDeltas);209deltasPermute->forward(layerInputs, layerOutputs, internals);210211// Sort predictions by scores and apply NMS. DetectionOutputLayer allocates212// output internally because of different number of objects after NMS.213layerInputs.resize(4);214layerInputs[0] = permuttedDeltas;215layerInputs[1] = permuttedScores;216layerInputs[2] = priorBoxes;217layerInputs[3] = umat_fakeImageBlob;218219layerOutputs[0] = UMat();220detectionOutputLayer->forward(layerInputs, layerOutputs, internals);221222// DetectionOutputLayer produces 1x1xNx7 output where N might be less or223// equal to keepTopAfterNMS. We fill the rest by zeros.224const int numDets = layerOutputs[0].total() / 7;225CV_Assert(numDets <= keepTopAfterNMS);226227MatShape s = shape(numDets, 7);228layerOutputs[0] = layerOutputs[0].reshape(1, s.size(), &s[0]);229230// The boxes.231UMat dst = outputs[0].rowRange(0, numDets);232layerOutputs[0].colRange(3, 7).copyTo(dst.colRange(1, 5));233dst.col(0).setTo(0); // First column are batch ids. Keep it zeros too.234235// The scores.236dst = outputs[1].rowRange(0, numDets);237layerOutputs[0].col(2).copyTo(dst);238239if (numDets < keepTopAfterNMS)240for (int i = 0; i < 2; ++i)241outputs[i].rowRange(numDets, keepTopAfterNMS).setTo(0);242243return true;244}245#endif246247void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE248{249CV_TRACE_FUNCTION();250CV_TRACE_ARG_VALUE(name, "name", name.c_str());251252CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&253OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),254forward_ocl(inputs_arr, outputs_arr, internals_arr))255256if (inputs_arr.depth() == CV_16S)257{258forward_fallback(inputs_arr, outputs_arr, internals_arr);259return;260}261262std::vector<Mat> inputs, outputs, internals;263inputs_arr.getMatVector(inputs);264outputs_arr.getMatVector(outputs);265internals_arr.getMatVector(internals);266267CV_Assert(inputs.size() == 3);268CV_Assert(internals.size() == 3);269const Mat& scores = inputs[0];270const Mat& bboxDeltas = inputs[1];271const Mat& imInfo = inputs[2];272Mat& priorBoxes = internals[0];273Mat& permuttedScores = internals[1];274Mat& permuttedDeltas = internals[2];275276CV_Assert(imInfo.total() >= 2);277// We've chosen the smallest data type because we need just a shape from it.278fakeImageBlob.create(shape(1, 1, imInfo.at<float>(0), imInfo.at<float>(1)), CV_8UC1);279280// Generate prior boxes.281std::vector<Mat> layerInputs(2), layerOutputs(1, priorBoxes);282layerInputs[0] = scores;283layerInputs[1] = fakeImageBlob;284priorBoxLayer->forward(layerInputs, layerOutputs, internals);285286// Permute scores.287layerInputs.assign(1, getObjectScores(scores));288layerOutputs.assign(1, permuttedScores);289scoresPermute->forward(layerInputs, layerOutputs, internals);290291// Permute deltas.292layerInputs.assign(1, bboxDeltas);293layerOutputs.assign(1, permuttedDeltas);294deltasPermute->forward(layerInputs, layerOutputs, internals);295296// Sort predictions by scores and apply NMS. DetectionOutputLayer allocates297// output internally because of different number of objects after NMS.298layerInputs.resize(4);299layerInputs[0] = permuttedDeltas;300layerInputs[1] = permuttedScores;301layerInputs[2] = priorBoxes;302layerInputs[3] = fakeImageBlob;303304layerOutputs[0] = Mat();305detectionOutputLayer->forward(layerInputs, layerOutputs, internals);306307// DetectionOutputLayer produces 1x1xNx7 output where N might be less or308// equal to keepTopAfterNMS. We fill the rest by zeros.309const int numDets = layerOutputs[0].total() / 7;310CV_Assert(numDets <= keepTopAfterNMS);311312// The boxes.313layerOutputs[0] = layerOutputs[0].reshape(1, numDets);314Mat dst = outputs[0].rowRange(0, numDets);315layerOutputs[0].colRange(3, 7).copyTo(dst.colRange(1, 5));316dst.col(0).setTo(0); // First column are batch ids. Keep it zeros too.317318// The scores.319dst = outputs[1].rowRange(0, numDets);320layerOutputs[0].col(2).copyTo(dst);321322if (numDets < keepTopAfterNMS)323for (int i = 0; i < 2; ++i)324outputs[i].rowRange(numDets, keepTopAfterNMS).setTo(0);325}326327virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE328{329#ifdef HAVE_INF_ENGINE330InferenceEngine::LayerParams lp;331lp.name = name;332lp.type = "Proposal";333lp.precision = InferenceEngine::Precision::FP32;334std::shared_ptr<InferenceEngine::CNNLayer> ieLayer(new InferenceEngine::CNNLayer(lp));335336ieLayer->params["base_size"] = format("%d", baseSize);337ieLayer->params["feat_stride"] = format("%d", featStride);338ieLayer->params["min_size"] = "16";339ieLayer->params["nms_thresh"] = format("%f", nmsThreshold);340ieLayer->params["post_nms_topn"] = format("%d", keepTopAfterNMS);341ieLayer->params["pre_nms_topn"] = format("%d", keepTopBeforeNMS);342if (ratios.size())343{344ieLayer->params["ratio"] = format("%f", ratios.get<float>(0));345for (int i = 1; i < ratios.size(); ++i)346ieLayer->params["ratio"] += format(",%f", ratios.get<float>(i));347}348if (scales.size())349{350ieLayer->params["scale"] = format("%f", scales.get<float>(0));351for (int i = 1; i < scales.size(); ++i)352ieLayer->params["scale"] += format(",%f", scales.get<float>(i));353}354return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));355#endif // HAVE_INF_ENGINE356return Ptr<BackendNode>();357}358359private:360// A first half of channels are background scores. We need only a second one.361static Mat getObjectScores(const Mat& m)362{363CV_Assert(m.dims == 4);364CV_Assert(m.size[0] == 1);365int channels = m.size[1];366CV_Assert((channels & 1) == 0);367return slice(m, Range::all(), Range(channels / 2, channels));368}369370#ifdef HAVE_OPENCL371static UMat getObjectScores(const UMat& m)372{373CV_Assert(m.dims == 4);374CV_Assert(m.size[0] == 1);375int channels = m.size[1];376CV_Assert((channels & 1) == 0);377378Range r = Range(channels / 2, channels);379Range ranges[4] = { Range::all(), r, Range::all(), Range::all() };380return m(&ranges[0]);381}382#endif383384Ptr<PriorBoxLayer> priorBoxLayer;385Ptr<DetectionOutputLayer> detectionOutputLayer;386387Ptr<PermuteLayer> deltasPermute;388Ptr<PermuteLayer> scoresPermute;389uint32_t keepTopBeforeNMS, keepTopAfterNMS, featStride, baseSize;390Mat fakeImageBlob;391float nmsThreshold;392DictValue ratios, scales;393#ifdef HAVE_OPENCL394UMat umat_fakeImageBlob;395#endif396};397398399Ptr<ProposalLayer> ProposalLayer::create(const LayerParams& params)400{401return Ptr<ProposalLayer>(new ProposalLayerImpl(params));402}403404} // namespace dnn405} // namespace cv406407408