Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/dnn/src/layers/proposal_layer.cpp
16337 views
1
// This file is part of OpenCV project.
2
// It is subject to the license terms in the LICENSE file found in the top-level directory
3
// of this distribution and at http://opencv.org/license.html.
4
5
// Copyright (C) 2017, Intel Corporation, all rights reserved.
6
// Third party copyrights are property of their respective owners.
7
#include "../precomp.hpp"
8
#include "layers_common.hpp"
9
#include "../op_inf_engine.hpp"
10
11
namespace cv { namespace dnn {
12
13
class ProposalLayerImpl CV_FINAL : public ProposalLayer
14
{
15
public:
16
ProposalLayerImpl(const LayerParams& params)
17
{
18
setParamsFrom(params);
19
20
featStride = params.get<uint32_t>("feat_stride", 16);
21
baseSize = params.get<uint32_t>("base_size", 16);
22
// uint32_t minSize = params.get<uint32_t>("min_size", 16);
23
keepTopBeforeNMS = params.get<uint32_t>("pre_nms_topn", 6000);
24
keepTopAfterNMS = params.get<uint32_t>("post_nms_topn", 300);
25
nmsThreshold = params.get<float>("nms_thresh", 0.7);
26
ratios = params.get("ratio");
27
scales = params.get("scale");
28
29
{
30
LayerParams lp;
31
lp.set("step", featStride);
32
lp.set("flip", false);
33
lp.set("clip", false);
34
lp.set("normalized_bbox", false);
35
lp.set("offset", 0.5 * baseSize / featStride);
36
37
// Unused values.
38
float variance[] = {0.1f, 0.1f, 0.2f, 0.2f};
39
lp.set("variance", DictValue::arrayReal<float*>(&variance[0], 4));
40
41
// Compute widths and heights explicitly.
42
std::vector<float> widths, heights;
43
widths.reserve(ratios.size() * scales.size());
44
heights.reserve(ratios.size() * scales.size());
45
for (int i = 0; i < ratios.size(); ++i)
46
{
47
float ratio = ratios.get<float>(i);
48
for (int j = 0; j < scales.size(); ++j)
49
{
50
float scale = scales.get<float>(j);
51
float width = std::floor(baseSize / sqrt(ratio) + 0.5f);
52
float height = std::floor(width * ratio + 0.5f);
53
widths.push_back(scale * width);
54
heights.push_back(scale * height);
55
}
56
}
57
lp.set("width", DictValue::arrayReal<float*>(&widths[0], widths.size()));
58
lp.set("height", DictValue::arrayReal<float*>(&heights[0], heights.size()));
59
60
priorBoxLayer = PriorBoxLayer::create(lp);
61
}
62
{
63
int order[] = {0, 2, 3, 1};
64
LayerParams lp;
65
lp.set("order", DictValue::arrayInt<int*>(&order[0], 4));
66
67
deltasPermute = PermuteLayer::create(lp);
68
scoresPermute = PermuteLayer::create(lp);
69
}
70
{
71
LayerParams lp;
72
lp.set("code_type", "CENTER_SIZE");
73
lp.set("num_classes", 1);
74
lp.set("share_location", true);
75
lp.set("background_label_id", 1); // We won't pass background scores so set it out of range [0, num_classes)
76
lp.set("variance_encoded_in_target", true);
77
lp.set("keep_top_k", keepTopAfterNMS);
78
lp.set("top_k", keepTopBeforeNMS);
79
lp.set("nms_threshold", nmsThreshold);
80
lp.set("normalized_bbox", false);
81
lp.set("clip", true);
82
83
detectionOutputLayer = DetectionOutputLayer::create(lp);
84
}
85
}
86
87
virtual bool supportBackend(int backendId) CV_OVERRIDE
88
{
89
return backendId == DNN_BACKEND_OPENCV ||
90
backendId == DNN_BACKEND_INFERENCE_ENGINE && preferableTarget != DNN_TARGET_MYRIAD;
91
}
92
93
bool getMemoryShapes(const std::vector<MatShape> &inputs,
94
const int requiredOutputs,
95
std::vector<MatShape> &outputs,
96
std::vector<MatShape> &internals) const CV_OVERRIDE
97
{
98
// We need to allocate the following blobs:
99
// - output priors from PriorBoxLayer
100
// - permuted priors
101
// - permuted scores
102
CV_Assert(inputs.size() == 3);
103
104
const MatShape& scores = inputs[0];
105
const MatShape& bboxDeltas = inputs[1];
106
107
std::vector<MatShape> layerInputs, layerOutputs, layerInternals;
108
109
// Prior boxes layer.
110
layerInputs.assign(1, scores);
111
priorBoxLayer->getMemoryShapes(layerInputs, 1, layerOutputs, layerInternals);
112
CV_Assert(layerOutputs.size() == 1);
113
CV_Assert(layerInternals.empty());
114
internals.push_back(layerOutputs[0]);
115
116
// Scores permute layer.
117
CV_Assert(scores.size() == 4);
118
MatShape objectScores = scores;
119
CV_Assert((scores[1] & 1) == 0); // Number of channels is even.
120
objectScores[1] /= 2;
121
layerInputs.assign(1, objectScores);
122
scoresPermute->getMemoryShapes(layerInputs, 1, layerOutputs, layerInternals);
123
CV_Assert(layerOutputs.size() == 1);
124
CV_Assert(layerInternals.empty());
125
internals.push_back(layerOutputs[0]);
126
127
// BBox predictions permute layer.
128
layerInputs.assign(1, bboxDeltas);
129
deltasPermute->getMemoryShapes(layerInputs, 1, layerOutputs, layerInternals);
130
CV_Assert(layerOutputs.size() == 1);
131
CV_Assert(layerInternals.empty());
132
internals.push_back(layerOutputs[0]);
133
134
outputs.resize(2);
135
outputs[0] = shape(keepTopAfterNMS, 5);
136
outputs[1] = shape(keepTopAfterNMS, 1);
137
return false;
138
}
139
140
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
141
{
142
std::vector<Mat> inputs;
143
inputs_arr.getMatVector(inputs);
144
145
std::vector<Mat> layerInputs;
146
std::vector<Mat> layerOutputs;
147
148
// Scores permute layer.
149
Mat scores = getObjectScores(inputs[0]);
150
layerInputs.assign(1, scores);
151
layerOutputs.assign(1, Mat(shape(scores.size[0], scores.size[2],
152
scores.size[3], scores.size[1]), CV_32FC1));
153
scoresPermute->finalize(layerInputs, layerOutputs);
154
155
// BBox predictions permute layer.
156
const Mat& bboxDeltas = inputs[1];
157
CV_Assert(bboxDeltas.dims == 4);
158
layerInputs.assign(1, bboxDeltas);
159
layerOutputs.assign(1, Mat(shape(bboxDeltas.size[0], bboxDeltas.size[2],
160
bboxDeltas.size[3], bboxDeltas.size[1]), CV_32FC1));
161
deltasPermute->finalize(layerInputs, layerOutputs);
162
}
163
164
#ifdef HAVE_OPENCL
165
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
166
{
167
std::vector<UMat> inputs;
168
std::vector<UMat> outputs;
169
std::vector<UMat> internals;
170
171
if (inputs_.depth() == CV_16S)
172
return false;
173
174
inputs_.getUMatVector(inputs);
175
outputs_.getUMatVector(outputs);
176
internals_.getUMatVector(internals);
177
178
CV_Assert(inputs.size() == 3);
179
CV_Assert(internals.size() == 3);
180
const UMat& scores = inputs[0];
181
const UMat& bboxDeltas = inputs[1];
182
const UMat& imInfo = inputs[2];
183
UMat& priorBoxes = internals[0];
184
UMat& permuttedScores = internals[1];
185
UMat& permuttedDeltas = internals[2];
186
187
CV_Assert(imInfo.total() >= 2);
188
// We've chosen the smallest data type because we need just a shape from it.
189
Mat szMat;
190
imInfo.copyTo(szMat);
191
int rows = (int)szMat.at<float>(0);
192
int cols = (int)szMat.at<float>(1);
193
umat_fakeImageBlob.create(shape(1, 1, rows, cols), CV_8UC1);
194
umat_fakeImageBlob.setTo(0);
195
196
// Generate prior boxes.
197
std::vector<UMat> layerInputs(2), layerOutputs(1, priorBoxes);
198
layerInputs[0] = scores;
199
layerInputs[1] = umat_fakeImageBlob;
200
priorBoxLayer->forward(layerInputs, layerOutputs, internals);
201
202
// Permute scores.
203
layerInputs.assign(1, getObjectScores(scores));
204
layerOutputs.assign(1, permuttedScores);
205
scoresPermute->forward(layerInputs, layerOutputs, internals);
206
207
// Permute deltas.
208
layerInputs.assign(1, bboxDeltas);
209
layerOutputs.assign(1, permuttedDeltas);
210
deltasPermute->forward(layerInputs, layerOutputs, internals);
211
212
// Sort predictions by scores and apply NMS. DetectionOutputLayer allocates
213
// output internally because of different number of objects after NMS.
214
layerInputs.resize(4);
215
layerInputs[0] = permuttedDeltas;
216
layerInputs[1] = permuttedScores;
217
layerInputs[2] = priorBoxes;
218
layerInputs[3] = umat_fakeImageBlob;
219
220
layerOutputs[0] = UMat();
221
detectionOutputLayer->forward(layerInputs, layerOutputs, internals);
222
223
// DetectionOutputLayer produces 1x1xNx7 output where N might be less or
224
// equal to keepTopAfterNMS. We fill the rest by zeros.
225
const int numDets = layerOutputs[0].total() / 7;
226
CV_Assert(numDets <= keepTopAfterNMS);
227
228
MatShape s = shape(numDets, 7);
229
layerOutputs[0] = layerOutputs[0].reshape(1, s.size(), &s[0]);
230
231
// The boxes.
232
UMat dst = outputs[0].rowRange(0, numDets);
233
layerOutputs[0].colRange(3, 7).copyTo(dst.colRange(1, 5));
234
dst.col(0).setTo(0); // First column are batch ids. Keep it zeros too.
235
236
// The scores.
237
dst = outputs[1].rowRange(0, numDets);
238
layerOutputs[0].col(2).copyTo(dst);
239
240
if (numDets < keepTopAfterNMS)
241
for (int i = 0; i < 2; ++i)
242
outputs[i].rowRange(numDets, keepTopAfterNMS).setTo(0);
243
244
return true;
245
}
246
#endif
247
248
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
249
{
250
CV_TRACE_FUNCTION();
251
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
252
253
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
254
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
255
forward_ocl(inputs_arr, outputs_arr, internals_arr))
256
257
if (inputs_arr.depth() == CV_16S)
258
{
259
forward_fallback(inputs_arr, outputs_arr, internals_arr);
260
return;
261
}
262
263
std::vector<Mat> inputs, outputs, internals;
264
inputs_arr.getMatVector(inputs);
265
outputs_arr.getMatVector(outputs);
266
internals_arr.getMatVector(internals);
267
268
CV_Assert(inputs.size() == 3);
269
CV_Assert(internals.size() == 3);
270
const Mat& scores = inputs[0];
271
const Mat& bboxDeltas = inputs[1];
272
const Mat& imInfo = inputs[2];
273
Mat& priorBoxes = internals[0];
274
Mat& permuttedScores = internals[1];
275
Mat& permuttedDeltas = internals[2];
276
277
CV_Assert(imInfo.total() >= 2);
278
// We've chosen the smallest data type because we need just a shape from it.
279
fakeImageBlob.create(shape(1, 1, imInfo.at<float>(0), imInfo.at<float>(1)), CV_8UC1);
280
281
// Generate prior boxes.
282
std::vector<Mat> layerInputs(2), layerOutputs(1, priorBoxes);
283
layerInputs[0] = scores;
284
layerInputs[1] = fakeImageBlob;
285
priorBoxLayer->forward(layerInputs, layerOutputs, internals);
286
287
// Permute scores.
288
layerInputs.assign(1, getObjectScores(scores));
289
layerOutputs.assign(1, permuttedScores);
290
scoresPermute->forward(layerInputs, layerOutputs, internals);
291
292
// Permute deltas.
293
layerInputs.assign(1, bboxDeltas);
294
layerOutputs.assign(1, permuttedDeltas);
295
deltasPermute->forward(layerInputs, layerOutputs, internals);
296
297
// Sort predictions by scores and apply NMS. DetectionOutputLayer allocates
298
// output internally because of different number of objects after NMS.
299
layerInputs.resize(4);
300
layerInputs[0] = permuttedDeltas;
301
layerInputs[1] = permuttedScores;
302
layerInputs[2] = priorBoxes;
303
layerInputs[3] = fakeImageBlob;
304
305
layerOutputs[0] = Mat();
306
detectionOutputLayer->forward(layerInputs, layerOutputs, internals);
307
308
// DetectionOutputLayer produces 1x1xNx7 output where N might be less or
309
// equal to keepTopAfterNMS. We fill the rest by zeros.
310
const int numDets = layerOutputs[0].total() / 7;
311
CV_Assert(numDets <= keepTopAfterNMS);
312
313
// The boxes.
314
layerOutputs[0] = layerOutputs[0].reshape(1, numDets);
315
Mat dst = outputs[0].rowRange(0, numDets);
316
layerOutputs[0].colRange(3, 7).copyTo(dst.colRange(1, 5));
317
dst.col(0).setTo(0); // First column are batch ids. Keep it zeros too.
318
319
// The scores.
320
dst = outputs[1].rowRange(0, numDets);
321
layerOutputs[0].col(2).copyTo(dst);
322
323
if (numDets < keepTopAfterNMS)
324
for (int i = 0; i < 2; ++i)
325
outputs[i].rowRange(numDets, keepTopAfterNMS).setTo(0);
326
}
327
328
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
329
{
330
#ifdef HAVE_INF_ENGINE
331
InferenceEngine::LayerParams lp;
332
lp.name = name;
333
lp.type = "Proposal";
334
lp.precision = InferenceEngine::Precision::FP32;
335
std::shared_ptr<InferenceEngine::CNNLayer> ieLayer(new InferenceEngine::CNNLayer(lp));
336
337
ieLayer->params["base_size"] = format("%d", baseSize);
338
ieLayer->params["feat_stride"] = format("%d", featStride);
339
ieLayer->params["min_size"] = "16";
340
ieLayer->params["nms_thresh"] = format("%f", nmsThreshold);
341
ieLayer->params["post_nms_topn"] = format("%d", keepTopAfterNMS);
342
ieLayer->params["pre_nms_topn"] = format("%d", keepTopBeforeNMS);
343
if (ratios.size())
344
{
345
ieLayer->params["ratio"] = format("%f", ratios.get<float>(0));
346
for (int i = 1; i < ratios.size(); ++i)
347
ieLayer->params["ratio"] += format(",%f", ratios.get<float>(i));
348
}
349
if (scales.size())
350
{
351
ieLayer->params["scale"] = format("%f", scales.get<float>(0));
352
for (int i = 1; i < scales.size(); ++i)
353
ieLayer->params["scale"] += format(",%f", scales.get<float>(i));
354
}
355
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
356
#endif // HAVE_INF_ENGINE
357
return Ptr<BackendNode>();
358
}
359
360
private:
361
// A first half of channels are background scores. We need only a second one.
362
static Mat getObjectScores(const Mat& m)
363
{
364
CV_Assert(m.dims == 4);
365
CV_Assert(m.size[0] == 1);
366
int channels = m.size[1];
367
CV_Assert((channels & 1) == 0);
368
return slice(m, Range::all(), Range(channels / 2, channels));
369
}
370
371
#ifdef HAVE_OPENCL
372
static UMat getObjectScores(const UMat& m)
373
{
374
CV_Assert(m.dims == 4);
375
CV_Assert(m.size[0] == 1);
376
int channels = m.size[1];
377
CV_Assert((channels & 1) == 0);
378
379
Range r = Range(channels / 2, channels);
380
Range ranges[4] = { Range::all(), r, Range::all(), Range::all() };
381
return m(&ranges[0]);
382
}
383
#endif
384
385
Ptr<PriorBoxLayer> priorBoxLayer;
386
Ptr<DetectionOutputLayer> detectionOutputLayer;
387
388
Ptr<PermuteLayer> deltasPermute;
389
Ptr<PermuteLayer> scoresPermute;
390
uint32_t keepTopBeforeNMS, keepTopAfterNMS, featStride, baseSize;
391
Mat fakeImageBlob;
392
float nmsThreshold;
393
DictValue ratios, scales;
394
#ifdef HAVE_OPENCL
395
UMat umat_fakeImageBlob;
396
#endif
397
};
398
399
400
Ptr<ProposalLayer> ProposalLayer::create(const LayerParams& params)
401
{
402
return Ptr<ProposalLayer>(new ProposalLayerImpl(params));
403
}
404
405
} // namespace dnn
406
} // namespace cv
407
408