CoCalc -- proposal

GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/dnn/src/layers/proposal_layer.cpp
¹⁶³³⁷ views
1
// This file is part of OpenCV project.
2
// It is subject to the license terms in the LICENSE file found in the top-level directory
3
// of this distribution and at http://opencv.org/license.html.
4

5
// Copyright (C) 2017, Intel Corporation, all rights reserved.
6
// Third party copyrights are property of their respective owners.
7
#include "../precomp.hpp"
8
#include "layers_common.hpp"
9
#include "../op_inf_engine.hpp"
10

11
namespace cv { namespace dnn {
12

13
class ProposalLayerImpl CV_FINAL : public ProposalLayer
14
{
15
public:
16
    ProposalLayerImpl(const LayerParams& params)
17
    {
18
        setParamsFrom(params);
19

20
        featStride = params.get<uint32_t>("feat_stride", 16);
21
        baseSize = params.get<uint32_t>("base_size", 16);
22
        // uint32_t minSize = params.get<uint32_t>("min_size", 16);
23
        keepTopBeforeNMS = params.get<uint32_t>("pre_nms_topn", 6000);
24
        keepTopAfterNMS = params.get<uint32_t>("post_nms_topn", 300);
25
        nmsThreshold = params.get<float>("nms_thresh", 0.7);
26
        ratios = params.get("ratio");
27
        scales = params.get("scale");
28

29
        {
30
            LayerParams lp;
31
            lp.set("step", featStride);
32
            lp.set("flip", false);
33
            lp.set("clip", false);
34
            lp.set("normalized_bbox", false);
35
            lp.set("offset", 0.5 * baseSize / featStride);
36

37
            // Unused values.
38
            float variance[] = {0.1f, 0.1f, 0.2f, 0.2f};
39
            lp.set("variance", DictValue::arrayReal<float*>(&variance[0], 4));
40

41
            // Compute widths and heights explicitly.
42
            std::vector<float> widths, heights;
43
            widths.reserve(ratios.size() * scales.size());
44
            heights.reserve(ratios.size() * scales.size());
45
            for (int i = 0; i < ratios.size(); ++i)
46
            {
47
                float ratio = ratios.get<float>(i);
48
                for (int j = 0; j < scales.size(); ++j)
49
                {
50
                    float scale = scales.get<float>(j);
51
                    float width = std::floor(baseSize / sqrt(ratio) + 0.5f);
52
                    float height = std::floor(width * ratio + 0.5f);
53
                    widths.push_back(scale * width);
54
                    heights.push_back(scale * height);
55
                }
56
            }
57
            lp.set("width", DictValue::arrayReal<float*>(&widths[0], widths.size()));
58
            lp.set("height", DictValue::arrayReal<float*>(&heights[0], heights.size()));
59

60
            priorBoxLayer = PriorBoxLayer::create(lp);
61
        }
62
        {
63
            int order[] = {0, 2, 3, 1};
64
            LayerParams lp;
65
            lp.set("order", DictValue::arrayInt<int*>(&order[0], 4));
66

67
            deltasPermute = PermuteLayer::create(lp);
68
            scoresPermute = PermuteLayer::create(lp);
69
        }
70
        {
71
            LayerParams lp;
72
            lp.set("code_type", "CENTER_SIZE");
73
            lp.set("num_classes", 1);
74
            lp.set("share_location", true);
75
            lp.set("background_label_id", 1);  // We won't pass background scores so set it out of range [0, num_classes)
76
            lp.set("variance_encoded_in_target", true);
77
            lp.set("keep_top_k", keepTopAfterNMS);
78
            lp.set("top_k", keepTopBeforeNMS);
79
            lp.set("nms_threshold", nmsThreshold);
80
            lp.set("normalized_bbox", false);
81
            lp.set("clip", true);
82

83
            detectionOutputLayer = DetectionOutputLayer::create(lp);
84
        }
85
    }
86

87
    virtual bool supportBackend(int backendId) CV_OVERRIDE
88
    {
89
        return backendId == DNN_BACKEND_OPENCV ||
90
               backendId == DNN_BACKEND_INFERENCE_ENGINE && preferableTarget != DNN_TARGET_MYRIAD;
91
    }
92

93
    bool getMemoryShapes(const std::vector<MatShape> &inputs,
94
                         const int requiredOutputs,
95
                         std::vector<MatShape> &outputs,
96
                         std::vector<MatShape> &internals) const CV_OVERRIDE
97
    {
98
        // We need to allocate the following blobs:
99
        // - output priors from PriorBoxLayer
100
        // - permuted priors
101
        // - permuted scores
102
        CV_Assert(inputs.size() == 3);
103

104
        const MatShape& scores = inputs[0];
105
        const MatShape& bboxDeltas = inputs[1];
106

107
        std::vector<MatShape> layerInputs, layerOutputs, layerInternals;
108

109
        // Prior boxes layer.
110
        layerInputs.assign(1, scores);
111
        priorBoxLayer->getMemoryShapes(layerInputs, 1, layerOutputs, layerInternals);
112
        CV_Assert(layerOutputs.size() == 1);
113
        CV_Assert(layerInternals.empty());
114
        internals.push_back(layerOutputs[0]);
115

116
        // Scores permute layer.
117
        CV_Assert(scores.size() == 4);
118
        MatShape objectScores = scores;
119
        CV_Assert((scores[1] & 1) == 0);  // Number of channels is even.
120
        objectScores[1] /= 2;
121
        layerInputs.assign(1, objectScores);
122
        scoresPermute->getMemoryShapes(layerInputs, 1, layerOutputs, layerInternals);
123
        CV_Assert(layerOutputs.size() == 1);
124
        CV_Assert(layerInternals.empty());
125
        internals.push_back(layerOutputs[0]);
126

127
        // BBox predictions permute layer.
128
        layerInputs.assign(1, bboxDeltas);
129
        deltasPermute->getMemoryShapes(layerInputs, 1, layerOutputs, layerInternals);
130
        CV_Assert(layerOutputs.size() == 1);
131
        CV_Assert(layerInternals.empty());
132
        internals.push_back(layerOutputs[0]);
133

134
        outputs.resize(2);
135
        outputs[0] = shape(keepTopAfterNMS, 5);
136
        outputs[1] = shape(keepTopAfterNMS, 1);
137
        return false;
138
    }
139

140
    void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
141
    {
142
        std::vector<Mat> inputs;
143
        inputs_arr.getMatVector(inputs);
144

145
        std::vector<Mat> layerInputs;
146
        std::vector<Mat> layerOutputs;
147

148
        // Scores permute layer.
149
        Mat scores = getObjectScores(inputs[0]);
150
        layerInputs.assign(1, scores);
151
        layerOutputs.assign(1, Mat(shape(scores.size[0], scores.size[2],
152
                                         scores.size[3], scores.size[1]), CV_32FC1));
153
        scoresPermute->finalize(layerInputs, layerOutputs);
154

155
        // BBox predictions permute layer.
156
        const Mat& bboxDeltas = inputs[1];
157
        CV_Assert(bboxDeltas.dims == 4);
158
        layerInputs.assign(1, bboxDeltas);
159
        layerOutputs.assign(1, Mat(shape(bboxDeltas.size[0], bboxDeltas.size[2],
160
                                         bboxDeltas.size[3], bboxDeltas.size[1]), CV_32FC1));
161
        deltasPermute->finalize(layerInputs, layerOutputs);
162
    }
163

164
#ifdef HAVE_OPENCL
165
    bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
166
    {
167
        std::vector<UMat> inputs;
168
        std::vector<UMat> outputs;
169
        std::vector<UMat> internals;
170

171
        if (inputs_.depth() == CV_16S)
172
            return false;
173

174
        inputs_.getUMatVector(inputs);
175
        outputs_.getUMatVector(outputs);
176
        internals_.getUMatVector(internals);
177

178
        CV_Assert(inputs.size() == 3);
179
        CV_Assert(internals.size() == 3);
180
        const UMat& scores = inputs[0];
181
        const UMat& bboxDeltas = inputs[1];
182
        const UMat& imInfo = inputs[2];
183
        UMat& priorBoxes = internals[0];
184
        UMat& permuttedScores = internals[1];
185
        UMat& permuttedDeltas = internals[2];
186

187
        CV_Assert(imInfo.total() >= 2);
188
        // We've chosen the smallest data type because we need just a shape from it.
189
        Mat szMat;
190
        imInfo.copyTo(szMat);
191
        int rows = (int)szMat.at<float>(0);
192
        int cols = (int)szMat.at<float>(1);
193
        umat_fakeImageBlob.create(shape(1, 1, rows, cols), CV_8UC1);
194
        umat_fakeImageBlob.setTo(0);
195

196
        // Generate prior boxes.
197
        std::vector<UMat> layerInputs(2), layerOutputs(1, priorBoxes);
198
        layerInputs[0] = scores;
199
        layerInputs[1] = umat_fakeImageBlob;
200
        priorBoxLayer->forward(layerInputs, layerOutputs, internals);
201

202
        // Permute scores.
203
        layerInputs.assign(1, getObjectScores(scores));
204
        layerOutputs.assign(1, permuttedScores);
205
        scoresPermute->forward(layerInputs, layerOutputs, internals);
206

207
        // Permute deltas.
208
        layerInputs.assign(1, bboxDeltas);
209
        layerOutputs.assign(1, permuttedDeltas);
210
        deltasPermute->forward(layerInputs, layerOutputs, internals);
211

212
        // Sort predictions by scores and apply NMS. DetectionOutputLayer allocates
213
        // output internally because of different number of objects after NMS.
214
        layerInputs.resize(4);
215
        layerInputs[0] = permuttedDeltas;
216
        layerInputs[1] = permuttedScores;
217
        layerInputs[2] = priorBoxes;
218
        layerInputs[3] = umat_fakeImageBlob;
219

220
        layerOutputs[0] = UMat();
221
        detectionOutputLayer->forward(layerInputs, layerOutputs, internals);
222

223
        // DetectionOutputLayer produces 1x1xNx7 output where N might be less or
224
        // equal to keepTopAfterNMS. We fill the rest by zeros.
225
        const int numDets = layerOutputs[0].total() / 7;
226
        CV_Assert(numDets <= keepTopAfterNMS);
227

228
        MatShape s = shape(numDets, 7);
229
        layerOutputs[0] = layerOutputs[0].reshape(1, s.size(), &s[0]);
230

231
        // The boxes.
232
        UMat dst = outputs[0].rowRange(0, numDets);
233
        layerOutputs[0].colRange(3, 7).copyTo(dst.colRange(1, 5));
234
        dst.col(0).setTo(0);  // First column are batch ids. Keep it zeros too.
235

236
        // The scores.
237
        dst = outputs[1].rowRange(0, numDets);
238
        layerOutputs[0].col(2).copyTo(dst);
239

240
        if (numDets < keepTopAfterNMS)
241
            for (int i = 0; i < 2; ++i)
242
                outputs[i].rowRange(numDets, keepTopAfterNMS).setTo(0);
243

244
        return true;
245
    }
246
#endif
247

248
    void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
249
    {
250
        CV_TRACE_FUNCTION();
251
        CV_TRACE_ARG_VALUE(name, "name", name.c_str());
252

253
        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
254
                   OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
255
                   forward_ocl(inputs_arr, outputs_arr, internals_arr))
256

257
        if (inputs_arr.depth() == CV_16S)
258
        {
259
            forward_fallback(inputs_arr, outputs_arr, internals_arr);
260
            return;
261
        }
262

263
        std::vector<Mat> inputs, outputs, internals;
264
        inputs_arr.getMatVector(inputs);
265
        outputs_arr.getMatVector(outputs);
266
        internals_arr.getMatVector(internals);
267

268
        CV_Assert(inputs.size() == 3);
269
        CV_Assert(internals.size() == 3);
270
        const Mat& scores = inputs[0];
271
        const Mat& bboxDeltas = inputs[1];
272
        const Mat& imInfo = inputs[2];
273
        Mat& priorBoxes = internals[0];
274
        Mat& permuttedScores = internals[1];
275
        Mat& permuttedDeltas = internals[2];
276

277
        CV_Assert(imInfo.total() >= 2);
278
        // We've chosen the smallest data type because we need just a shape from it.
279
        fakeImageBlob.create(shape(1, 1, imInfo.at<float>(0), imInfo.at<float>(1)), CV_8UC1);
280

281
        // Generate prior boxes.
282
        std::vector<Mat> layerInputs(2), layerOutputs(1, priorBoxes);
283
        layerInputs[0] = scores;
284
        layerInputs[1] = fakeImageBlob;
285
        priorBoxLayer->forward(layerInputs, layerOutputs, internals);
286

287
        // Permute scores.
288
        layerInputs.assign(1, getObjectScores(scores));
289
        layerOutputs.assign(1, permuttedScores);
290
        scoresPermute->forward(layerInputs, layerOutputs, internals);
291

292
        // Permute deltas.
293
        layerInputs.assign(1, bboxDeltas);
294
        layerOutputs.assign(1, permuttedDeltas);
295
        deltasPermute->forward(layerInputs, layerOutputs, internals);
296

297
        // Sort predictions by scores and apply NMS. DetectionOutputLayer allocates
298
        // output internally because of different number of objects after NMS.
299
        layerInputs.resize(4);
300
        layerInputs[0] = permuttedDeltas;
301
        layerInputs[1] = permuttedScores;
302
        layerInputs[2] = priorBoxes;
303
        layerInputs[3] = fakeImageBlob;
304

305
        layerOutputs[0] = Mat();
306
        detectionOutputLayer->forward(layerInputs, layerOutputs, internals);
307

308
        // DetectionOutputLayer produces 1x1xNx7 output where N might be less or
309
        // equal to keepTopAfterNMS. We fill the rest by zeros.
310
        const int numDets = layerOutputs[0].total() / 7;
311
        CV_Assert(numDets <= keepTopAfterNMS);
312

313
        // The boxes.
314
        layerOutputs[0] = layerOutputs[0].reshape(1, numDets);
315
        Mat dst = outputs[0].rowRange(0, numDets);
316
        layerOutputs[0].colRange(3, 7).copyTo(dst.colRange(1, 5));
317
        dst.col(0).setTo(0);  // First column are batch ids. Keep it zeros too.
318

319
        // The scores.
320
        dst = outputs[1].rowRange(0, numDets);
321
        layerOutputs[0].col(2).copyTo(dst);
322

323
        if (numDets < keepTopAfterNMS)
324
            for (int i = 0; i < 2; ++i)
325
                outputs[i].rowRange(numDets, keepTopAfterNMS).setTo(0);
326
    }
327

328
    virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
329
    {
330
#ifdef HAVE_INF_ENGINE
331
        InferenceEngine::LayerParams lp;
332
        lp.name = name;
333
        lp.type = "Proposal";
334
        lp.precision = InferenceEngine::Precision::FP32;
335
        std::shared_ptr<InferenceEngine::CNNLayer> ieLayer(new InferenceEngine::CNNLayer(lp));
336

337
        ieLayer->params["base_size"] = format("%d", baseSize);
338
        ieLayer->params["feat_stride"] = format("%d", featStride);
339
        ieLayer->params["min_size"] = "16";
340
        ieLayer->params["nms_thresh"] = format("%f", nmsThreshold);
341
        ieLayer->params["post_nms_topn"] = format("%d", keepTopAfterNMS);
342
        ieLayer->params["pre_nms_topn"] = format("%d", keepTopBeforeNMS);
343
        if (ratios.size())
344
        {
345
            ieLayer->params["ratio"] = format("%f", ratios.get<float>(0));
346
            for (int i = 1; i < ratios.size(); ++i)
347
                ieLayer->params["ratio"] += format(",%f", ratios.get<float>(i));
348
        }
349
        if (scales.size())
350
        {
351
            ieLayer->params["scale"] = format("%f", scales.get<float>(0));
352
            for (int i = 1; i < scales.size(); ++i)
353
                ieLayer->params["scale"] += format(",%f", scales.get<float>(i));
354
        }
355
        return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
356
#endif  // HAVE_INF_ENGINE
357
        return Ptr<BackendNode>();
358
    }
359

360
private:
361
    // A first half of channels are background scores. We need only a second one.
362
    static Mat getObjectScores(const Mat& m)
363
    {
364
        CV_Assert(m.dims == 4);
365
        CV_Assert(m.size[0] == 1);
366
        int channels = m.size[1];
367
        CV_Assert((channels & 1) == 0);
368
        return slice(m, Range::all(), Range(channels / 2, channels));
369
    }
370

371
#ifdef HAVE_OPENCL
372
    static UMat getObjectScores(const UMat& m)
373
    {
374
        CV_Assert(m.dims == 4);
375
        CV_Assert(m.size[0] == 1);
376
        int channels = m.size[1];
377
        CV_Assert((channels & 1) == 0);
378

379
        Range r = Range(channels / 2, channels);
380
        Range ranges[4] = { Range::all(), r, Range::all(), Range::all() };
381
        return m(&ranges[0]);
382
    }
383
#endif
384

385
    Ptr<PriorBoxLayer> priorBoxLayer;
386
    Ptr<DetectionOutputLayer> detectionOutputLayer;
387

388
    Ptr<PermuteLayer> deltasPermute;
389
    Ptr<PermuteLayer> scoresPermute;
390
    uint32_t keepTopBeforeNMS, keepTopAfterNMS, featStride, baseSize;
391
    Mat fakeImageBlob;
392
    float nmsThreshold;
393
    DictValue ratios, scales;
394
#ifdef HAVE_OPENCL
395
    UMat umat_fakeImageBlob;
396
#endif
397
};
398

399

400
Ptr<ProposalLayer> ProposalLayer::create(const LayerParams& params)
401
{
402
    return Ptr<ProposalLayer>(new ProposalLayerImpl(params));
403
}
404

405
}  // namespace dnn
406
}  // namespace cv
407

408
Product

Resources

Company