Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/dnn/src/layers/prior_box_layer.cpp
16337 views
1
/*M///////////////////////////////////////////////////////////////////////////////////////
2
//
3
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4
//
5
// By downloading, copying, installing or using the software you agree to this license.
6
// If you do not agree to this license, do not download, install,
7
// copy or use the software.
8
//
9
//
10
// License Agreement
11
// For Open Source Computer Vision Library
12
//
13
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14
// Copyright (C) 2017, Intel Corporation, all rights reserved.
15
// Third party copyrights are property of their respective owners.
16
//
17
// Redistribution and use in source and binary forms, with or without modification,
18
// are permitted provided that the following conditions are met:
19
//
20
// * Redistribution's of source code must retain the above copyright notice,
21
// this list of conditions and the following disclaimer.
22
//
23
// * Redistribution's in binary form must reproduce the above copyright notice,
24
// this list of conditions and the following disclaimer in the documentation
25
// and/or other materials provided with the distribution.
26
//
27
// * The name of the copyright holders may not be used to endorse or promote products
28
// derived from this software without specific prior written permission.
29
//
30
// This software is provided by the copyright holders and contributors "as is" and
31
// any express or implied warranties, including, but not limited to, the implied
32
// warranties of merchantability and fitness for a particular purpose are disclaimed.
33
// In no event shall the Intel Corporation or contributors be liable for any direct,
34
// indirect, incidental, special, exemplary, or consequential damages
35
// (including, but not limited to, procurement of substitute goods or services;
36
// loss of use, data, or profits; or business interruption) however caused
37
// and on any theory of liability, whether in contract, strict liability,
38
// or tort (including negligence or otherwise) arising in any way out of
39
// the use of this software, even if advised of the possibility of such damage.
40
//
41
//M*/
42
43
#include "../precomp.hpp"
44
#include "layers_common.hpp"
45
#include "../op_inf_engine.hpp"
46
#include "../op_vkcom.hpp"
47
#include <float.h>
48
#include <algorithm>
49
#include <cmath>
50
51
#ifdef HAVE_OPENCL
52
#include "opencl_kernels_dnn.hpp"
53
#endif
54
55
namespace cv
56
{
57
namespace dnn
58
{
59
60
class PriorBoxLayerImpl CV_FINAL : public PriorBoxLayer
61
{
62
public:
63
static bool getParameterDict(const LayerParams &params,
64
const std::string &parameterName,
65
DictValue& result)
66
{
67
if (!params.has(parameterName))
68
{
69
return false;
70
}
71
72
result = params.get(parameterName);
73
return true;
74
}
75
76
template<typename T>
77
T getParameter(const LayerParams &params,
78
const std::string &parameterName,
79
const size_t &idx=0,
80
const bool required=true,
81
const T& defaultValue=T())
82
{
83
DictValue dictValue;
84
bool success = getParameterDict(params, parameterName, dictValue);
85
if(!success)
86
{
87
if(required)
88
{
89
std::string message = _layerName;
90
message += " layer parameter does not contain ";
91
message += parameterName;
92
message += " parameter.";
93
CV_Error(Error::StsBadArg, message);
94
}
95
else
96
{
97
return defaultValue;
98
}
99
}
100
return dictValue.get<T>(idx);
101
}
102
103
void getAspectRatios(const LayerParams &params)
104
{
105
DictValue aspectRatioParameter;
106
bool aspectRatioRetieved = getParameterDict(params, "aspect_ratio", aspectRatioParameter);
107
if (!aspectRatioRetieved)
108
return;
109
110
for (int i = 0; i < aspectRatioParameter.size(); ++i)
111
{
112
float aspectRatio = aspectRatioParameter.get<float>(i);
113
bool alreadyExists = fabs(aspectRatio - 1.f) < 1e-6f;
114
115
for (size_t j = 0; j < _aspectRatios.size() && !alreadyExists; ++j)
116
{
117
alreadyExists = fabs(aspectRatio - _aspectRatios[j]) < 1e-6;
118
}
119
if (!alreadyExists)
120
{
121
_aspectRatios.push_back(aspectRatio);
122
if (_flip)
123
{
124
_aspectRatios.push_back(1./aspectRatio);
125
}
126
}
127
}
128
}
129
130
static void getParams(const std::string& name, const LayerParams &params,
131
std::vector<float>* values)
132
{
133
DictValue dict;
134
if (getParameterDict(params, name, dict))
135
{
136
values->resize(dict.size());
137
for (int i = 0; i < dict.size(); ++i)
138
{
139
(*values)[i] = dict.get<float>(i);
140
}
141
}
142
else
143
values->clear();
144
}
145
146
void getVariance(const LayerParams &params)
147
{
148
DictValue varianceParameter;
149
bool varianceParameterRetrieved = getParameterDict(params, "variance", varianceParameter);
150
CV_Assert(varianceParameterRetrieved);
151
152
int varianceSize = varianceParameter.size();
153
if (varianceSize > 1)
154
{
155
// Must and only provide 4 variance.
156
CV_Assert(varianceSize == 4);
157
158
for (int i = 0; i < varianceSize; ++i)
159
{
160
float variance = varianceParameter.get<float>(i);
161
CV_Assert(variance > 0);
162
_variance.push_back(variance);
163
}
164
}
165
else
166
{
167
if (varianceSize == 1)
168
{
169
float variance = varianceParameter.get<float>(0);
170
CV_Assert(variance > 0);
171
_variance.push_back(variance);
172
}
173
else
174
{
175
// Set default to 0.1.
176
_variance.push_back(0.1f);
177
}
178
}
179
}
180
181
PriorBoxLayerImpl(const LayerParams &params)
182
{
183
setParamsFrom(params);
184
_minSize = getParameter<float>(params, "min_size", 0, false, 0);
185
_flip = getParameter<bool>(params, "flip", 0, false, true);
186
_clip = getParameter<bool>(params, "clip", 0, false, true);
187
_bboxesNormalized = getParameter<bool>(params, "normalized_bbox", 0, false, true);
188
189
_aspectRatios.clear();
190
191
getAspectRatios(params);
192
getVariance(params);
193
194
_maxSize = -1;
195
if (params.has("max_size"))
196
{
197
_maxSize = params.get("max_size").get<float>(0);
198
CV_Assert(_maxSize > _minSize);
199
}
200
201
std::vector<float> widths, heights;
202
getParams("width", params, &widths);
203
getParams("height", params, &heights);
204
_explicitSizes = !widths.empty();
205
CV_Assert(widths.size() == heights.size());
206
207
if (_explicitSizes)
208
{
209
CV_Assert(_aspectRatios.empty());
210
CV_Assert(!params.has("min_size"));
211
CV_Assert(!params.has("max_size"));
212
_boxWidths = widths;
213
_boxHeights = heights;
214
}
215
else
216
{
217
CV_Assert(_minSize > 0);
218
_boxWidths.resize(1 + (_maxSize > 0 ? 1 : 0) + _aspectRatios.size());
219
_boxHeights.resize(_boxWidths.size());
220
_boxWidths[0] = _boxHeights[0] = _minSize;
221
222
int i = 1;
223
if (_maxSize > 0)
224
{
225
// second prior: aspect_ratio = 1, size = sqrt(min_size * max_size)
226
_boxWidths[i] = _boxHeights[i] = sqrt(_minSize * _maxSize);
227
i += 1;
228
}
229
230
// rest of priors
231
for (size_t r = 0; r < _aspectRatios.size(); ++r)
232
{
233
float arSqrt = sqrt(_aspectRatios[r]);
234
_boxWidths[i + r] = _minSize * arSqrt;
235
_boxHeights[i + r] = _minSize / arSqrt;
236
}
237
}
238
CV_Assert(_boxWidths.size() == _boxHeights.size());
239
_numPriors = _boxWidths.size();
240
241
if (params.has("step_h") || params.has("step_w")) {
242
CV_Assert(!params.has("step"));
243
_stepY = getParameter<float>(params, "step_h");
244
CV_Assert(_stepY > 0.);
245
_stepX = getParameter<float>(params, "step_w");
246
CV_Assert(_stepX > 0.);
247
} else if (params.has("step")) {
248
const float step = getParameter<float>(params, "step");
249
CV_Assert(step > 0);
250
_stepY = step;
251
_stepX = step;
252
} else {
253
_stepY = 0;
254
_stepX = 0;
255
}
256
if (params.has("offset_h") || params.has("offset_w"))
257
{
258
CV_Assert_N(!params.has("offset"), params.has("offset_h"), params.has("offset_w"));
259
getParams("offset_h", params, &_offsetsY);
260
getParams("offset_w", params, &_offsetsX);
261
CV_Assert(_offsetsX.size() == _offsetsY.size());
262
_numPriors *= std::max((size_t)1, 2 * (_offsetsX.size() - 1));
263
}
264
else
265
{
266
float offset = getParameter<float>(params, "offset", 0, false, 0.5);
267
_offsetsX.assign(1, offset);
268
_offsetsY.assign(1, offset);
269
}
270
}
271
272
virtual bool supportBackend(int backendId) CV_OVERRIDE
273
{
274
return backendId == DNN_BACKEND_OPENCV ||
275
backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine() ||
276
backendId == DNN_BACKEND_VKCOM && haveVulkan();
277
}
278
279
bool getMemoryShapes(const std::vector<MatShape> &inputs,
280
const int requiredOutputs,
281
std::vector<MatShape> &outputs,
282
std::vector<MatShape> &internals) const CV_OVERRIDE
283
{
284
CV_Assert(!inputs.empty());
285
286
int layerHeight = inputs[0][2];
287
int layerWidth = inputs[0][3];
288
289
// Since all images in a batch has same height and width, we only need to
290
// generate one set of priors which can be shared across all images.
291
size_t outNum = 1;
292
// 2 channels. First channel stores the mean of each prior coordinate.
293
// Second channel stores the variance of each prior coordinate.
294
size_t outChannels = 2;
295
296
outputs.resize(1, shape(outNum, outChannels,
297
layerHeight * layerWidth * _numPriors * 4));
298
299
return false;
300
}
301
302
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
303
{
304
std::vector<Mat> inputs;
305
inputs_arr.getMatVector(inputs);
306
307
CV_CheckGT(inputs.size(), (size_t)1, "");
308
CV_CheckEQ(inputs[0].dims, 4, ""); CV_CheckEQ(inputs[1].dims, 4, "");
309
int layerWidth = inputs[0].size[3];
310
int layerHeight = inputs[0].size[2];
311
312
int imageWidth = inputs[1].size[3];
313
int imageHeight = inputs[1].size[2];
314
315
_stepY = _stepY == 0 ? (static_cast<float>(imageHeight) / layerHeight) : _stepY;
316
_stepX = _stepX == 0 ? (static_cast<float>(imageWidth) / layerWidth) : _stepX;
317
}
318
319
#ifdef HAVE_OPENCL
320
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
321
{
322
std::vector<UMat> inputs;
323
std::vector<UMat> outputs;
324
325
bool use_half = (inps.depth() == CV_16S);
326
inps.getUMatVector(inputs);
327
outs.getUMatVector(outputs);
328
329
int _layerWidth = inputs[0].size[3];
330
int _layerHeight = inputs[0].size[2];
331
332
int _imageWidth = inputs[1].size[3];
333
int _imageHeight = inputs[1].size[2];
334
335
if (umat_offsetsX.empty())
336
{
337
Mat offsetsX(1, _offsetsX.size(), CV_32FC1, &_offsetsX[0]);
338
Mat offsetsY(1, _offsetsY.size(), CV_32FC1, &_offsetsY[0]);
339
Mat variance(1, _variance.size(), CV_32FC1, &_variance[0]);
340
Mat widths(1, _boxWidths.size(), CV_32FC1, &_boxWidths[0]);
341
Mat heights(1, _boxHeights.size(), CV_32FC1, &_boxHeights[0]);
342
343
offsetsX.copyTo(umat_offsetsX);
344
offsetsY.copyTo(umat_offsetsY);
345
variance.copyTo(umat_variance);
346
widths.copyTo(umat_widths);
347
heights.copyTo(umat_heights);
348
}
349
350
String opts;
351
if (use_half)
352
opts = "-DDtype=half -DDtype4=half4 -Dconvert_T=convert_half4";
353
else
354
opts = "-DDtype=float -DDtype4=float4 -Dconvert_T=convert_float4";
355
356
size_t nthreads = _layerHeight * _layerWidth;
357
ocl::Kernel kernel("prior_box", ocl::dnn::prior_box_oclsrc, opts);
358
359
kernel.set(0, (int)nthreads);
360
kernel.set(1, (float)_stepX);
361
kernel.set(2, (float)_stepY);
362
kernel.set(3, ocl::KernelArg::PtrReadOnly(umat_offsetsX));
363
kernel.set(4, ocl::KernelArg::PtrReadOnly(umat_offsetsY));
364
kernel.set(5, (int)_offsetsX.size());
365
kernel.set(6, ocl::KernelArg::PtrReadOnly(umat_widths));
366
kernel.set(7, ocl::KernelArg::PtrReadOnly(umat_heights));
367
kernel.set(8, (int)_boxWidths.size());
368
kernel.set(9, ocl::KernelArg::PtrWriteOnly(outputs[0]));
369
kernel.set(10, (int)_layerHeight);
370
kernel.set(11, (int)_layerWidth);
371
kernel.set(12, (int)_imageHeight);
372
kernel.set(13, (int)_imageWidth);
373
kernel.run(1, &nthreads, NULL, false);
374
375
// clip the prior's coordinate such that it is within [0, 1]
376
if (_clip)
377
{
378
ocl::Kernel kernel("clip", ocl::dnn::prior_box_oclsrc, opts);
379
size_t nthreads = _layerHeight * _layerWidth * _numPriors * 4;
380
if (!kernel.args((int)nthreads, ocl::KernelArg::PtrReadWrite(outputs[0]))
381
.run(1, &nthreads, NULL, false))
382
return false;
383
}
384
385
// set the variance.
386
{
387
ocl::Kernel kernel("set_variance", ocl::dnn::prior_box_oclsrc, opts);
388
int offset = total(shape(outputs[0]), 2);
389
size_t nthreads = _layerHeight * _layerWidth * _numPriors;
390
kernel.set(0, (int)nthreads);
391
kernel.set(1, (int)offset);
392
kernel.set(2, (int)_variance.size());
393
kernel.set(3, ocl::KernelArg::PtrReadOnly(umat_variance));
394
kernel.set(4, ocl::KernelArg::PtrWriteOnly(outputs[0]));
395
if (!kernel.run(1, &nthreads, NULL, false))
396
return false;
397
}
398
return true;
399
}
400
#endif
401
402
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
403
{
404
CV_TRACE_FUNCTION();
405
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
406
407
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
408
forward_ocl(inputs_arr, outputs_arr, internals_arr))
409
410
if (inputs_arr.depth() == CV_16S)
411
{
412
forward_fallback(inputs_arr, outputs_arr, internals_arr);
413
return;
414
}
415
416
std::vector<Mat> inputs, outputs;
417
inputs_arr.getMatVector(inputs);
418
outputs_arr.getMatVector(outputs);
419
420
CV_Assert(inputs.size() == 2);
421
422
int _layerWidth = inputs[0].size[3];
423
int _layerHeight = inputs[0].size[2];
424
425
int _imageWidth = inputs[1].size[3];
426
int _imageHeight = inputs[1].size[2];
427
428
float* outputPtr = outputs[0].ptr<float>();
429
float _boxWidth, _boxHeight;
430
for (size_t h = 0; h < _layerHeight; ++h)
431
{
432
for (size_t w = 0; w < _layerWidth; ++w)
433
{
434
for (size_t i = 0; i < _boxWidths.size(); ++i)
435
{
436
_boxWidth = _boxWidths[i];
437
_boxHeight = _boxHeights[i];
438
for (int j = 0; j < _offsetsX.size(); ++j)
439
{
440
float center_x = (w + _offsetsX[j]) * _stepX;
441
float center_y = (h + _offsetsY[j]) * _stepY;
442
outputPtr = addPrior(center_x, center_y, _boxWidth, _boxHeight, _imageWidth,
443
_imageHeight, _bboxesNormalized, outputPtr);
444
}
445
}
446
}
447
}
448
// clip the prior's coordinate such that it is within [0, 1]
449
if (_clip)
450
{
451
int _outChannelSize = _layerHeight * _layerWidth * _numPriors * 4;
452
outputPtr = outputs[0].ptr<float>();
453
for (size_t d = 0; d < _outChannelSize; ++d)
454
{
455
outputPtr[d] = std::min<float>(std::max<float>(outputPtr[d], 0.), 1.);
456
}
457
}
458
// set the variance.
459
outputPtr = outputs[0].ptr<float>(0, 1);
460
if(_variance.size() == 1)
461
{
462
Mat secondChannel(1, outputs[0].size[2], CV_32F, outputPtr);
463
secondChannel.setTo(Scalar::all(_variance[0]));
464
}
465
else
466
{
467
int count = 0;
468
for (size_t h = 0; h < _layerHeight; ++h)
469
{
470
for (size_t w = 0; w < _layerWidth; ++w)
471
{
472
for (size_t i = 0; i < _numPriors; ++i)
473
{
474
for (int j = 0; j < 4; ++j)
475
{
476
outputPtr[count] = _variance[j];
477
++count;
478
}
479
}
480
}
481
}
482
}
483
}
484
485
virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &input) CV_OVERRIDE
486
{
487
#ifdef HAVE_VULKAN
488
std::shared_ptr<vkcom::OpBase> op(new vkcom::OpPriorBox(_stepX, _stepY,
489
_clip, _numPriors,
490
_variance, _offsetsX,
491
_offsetsY, _boxWidths,
492
_boxHeights));
493
return Ptr<BackendNode>(new VkComBackendNode(input, op));
494
#endif // HAVE_VULKAN
495
return Ptr<BackendNode>();
496
}
497
498
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
499
{
500
#ifdef HAVE_INF_ENGINE
501
InferenceEngine::LayerParams lp;
502
lp.name = name;
503
lp.type = _explicitSizes ? "PriorBoxClustered" : "PriorBox";
504
lp.precision = InferenceEngine::Precision::FP32;
505
std::shared_ptr<InferenceEngine::CNNLayer> ieLayer(new InferenceEngine::CNNLayer(lp));
506
507
if (_explicitSizes)
508
{
509
CV_Assert(!_boxWidths.empty()); CV_Assert(!_boxHeights.empty());
510
CV_Assert(_boxWidths.size() == _boxHeights.size());
511
ieLayer->params["width"] = format("%f", _boxWidths[0]);
512
ieLayer->params["height"] = format("%f", _boxHeights[0]);
513
for (int i = 1; i < _boxWidths.size(); ++i)
514
{
515
ieLayer->params["width"] += format(",%f", _boxWidths[i]);
516
ieLayer->params["height"] += format(",%f", _boxHeights[i]);
517
}
518
}
519
else
520
{
521
ieLayer->params["min_size"] = format("%f", _minSize);
522
ieLayer->params["max_size"] = _maxSize > 0 ? format("%f", _maxSize) : "";
523
524
if (!_aspectRatios.empty())
525
{
526
ieLayer->params["aspect_ratio"] = format("%f", _aspectRatios[0]);
527
for (int i = 1; i < _aspectRatios.size(); ++i)
528
ieLayer->params["aspect_ratio"] += format(",%f", _aspectRatios[i]);
529
}
530
}
531
532
ieLayer->params["flip"] = "0"; // We already flipped aspect ratios.
533
ieLayer->params["clip"] = _clip ? "1" : "0";
534
535
CV_Assert(!_variance.empty());
536
ieLayer->params["variance"] = format("%f", _variance[0]);
537
for (int i = 1; i < _variance.size(); ++i)
538
ieLayer->params["variance"] += format(",%f", _variance[i]);
539
540
if (_stepX == _stepY)
541
{
542
ieLayer->params["step"] = format("%f", _stepX);
543
ieLayer->params["step_h"] = "0.0";
544
ieLayer->params["step_w"] = "0.0";
545
}
546
else
547
{
548
ieLayer->params["step"] = "0.0";
549
ieLayer->params["step_h"] = format("%f", _stepY);
550
ieLayer->params["step_w"] = format("%f", _stepX);
551
}
552
CV_CheckEQ(_offsetsX.size(), (size_t)1, ""); CV_CheckEQ(_offsetsY.size(), (size_t)1, ""); CV_CheckEQ(_offsetsX[0], _offsetsY[0], "");
553
ieLayer->params["offset"] = format("%f", _offsetsX[0]);
554
555
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
556
#endif // HAVE_INF_ENGINE
557
return Ptr<BackendNode>();
558
}
559
560
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
561
const std::vector<MatShape> &outputs) const CV_OVERRIDE
562
{
563
CV_UNUSED(outputs); // suppress unused variable warning
564
long flops = 0;
565
566
for (int i = 0; i < inputs.size(); i++)
567
{
568
flops += total(inputs[i], 2) * _numPriors * 4;
569
}
570
571
return flops;
572
}
573
574
private:
575
float _minSize;
576
float _maxSize;
577
578
float _stepX, _stepY;
579
580
std::vector<float> _aspectRatios;
581
std::vector<float> _variance;
582
std::vector<float> _offsetsX;
583
std::vector<float> _offsetsY;
584
// Precomputed final widths and heights based on aspect ratios or explicit sizes.
585
std::vector<float> _boxWidths;
586
std::vector<float> _boxHeights;
587
588
#ifdef HAVE_OPENCL
589
UMat umat_offsetsX;
590
UMat umat_offsetsY;
591
UMat umat_widths;
592
UMat umat_heights;
593
UMat umat_variance;
594
#endif
595
596
bool _flip;
597
bool _clip;
598
bool _explicitSizes;
599
bool _bboxesNormalized;
600
601
size_t _numPriors;
602
603
static const size_t _numAxes = 4;
604
static const std::string _layerName;
605
606
static float* addPrior(float center_x, float center_y, float width, float height,
607
float imgWidth, float imgHeight, bool normalized, float* dst)
608
{
609
if (normalized)
610
{
611
dst[0] = (center_x - width * 0.5f) / imgWidth; // xmin
612
dst[1] = (center_y - height * 0.5f) / imgHeight; // ymin
613
dst[2] = (center_x + width * 0.5f) / imgWidth; // xmax
614
dst[3] = (center_y + height * 0.5f) / imgHeight; // ymax
615
}
616
else
617
{
618
dst[0] = center_x - width * 0.5f; // xmin
619
dst[1] = center_y - height * 0.5f; // ymin
620
dst[2] = center_x + width * 0.5f - 1.0f; // xmax
621
dst[3] = center_y + height * 0.5f - 1.0f; // ymax
622
}
623
return dst + 4;
624
}
625
};
626
627
const std::string PriorBoxLayerImpl::_layerName = std::string("PriorBox");
628
629
Ptr<PriorBoxLayer> PriorBoxLayer::create(const LayerParams &params)
630
{
631
return Ptr<PriorBoxLayer>(new PriorBoxLayerImpl(params));
632
}
633
634
}
635
}
636
637