Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/dnn/src/dnn.cpp
16337 views
1
/*M///////////////////////////////////////////////////////////////////////////////////////
2
//
3
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4
//
5
// By downloading, copying, installing or using the software you agree to this license.
6
// If you do not agree to this license, do not download, install,
7
// copy or use the software.
8
//
9
//
10
// License Agreement
11
// For Open Source Computer Vision Library
12
//
13
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14
// Third party copyrights are property of their respective owners.
15
//
16
// Redistribution and use in source and binary forms, with or without modification,
17
// are permitted provided that the following conditions are met:
18
//
19
// * Redistribution's of source code must retain the above copyright notice,
20
// this list of conditions and the following disclaimer.
21
//
22
// * Redistribution's in binary form must reproduce the above copyright notice,
23
// this list of conditions and the following disclaimer in the documentation
24
// and/or other materials provided with the distribution.
25
//
26
// * The name of the copyright holders may not be used to endorse or promote products
27
// derived from this software without specific prior written permission.
28
//
29
// This software is provided by the copyright holders and contributors "as is" and
30
// any express or implied warranties, including, but not limited to, the implied
31
// warranties of merchantability and fitness for a particular purpose are disclaimed.
32
// In no event shall the Intel Corporation or contributors be liable for any direct,
33
// indirect, incidental, special, exemplary, or consequential damages
34
// (including, but not limited to, procurement of substitute goods or services;
35
// loss of use, data, or profits; or business interruption) however caused
36
// and on any theory of liability, whether in contract, strict liability,
37
// or tort (including negligence or otherwise) arising in any way out of
38
// the use of this software, even if advised of the possibility of such damage.
39
//
40
//M*/
41
42
#include "precomp.hpp"
43
#include "op_halide.hpp"
44
#include "op_inf_engine.hpp"
45
#include "op_vkcom.hpp"
46
#include "halide_scheduler.hpp"
47
#include <set>
48
#include <algorithm>
49
#include <iostream>
50
#include <sstream>
51
#include <iterator>
52
#include <numeric>
53
#include <opencv2/dnn/shape_utils.hpp>
54
#include <opencv2/imgproc.hpp>
55
56
#include <opencv2/core/utils/configuration.private.hpp>
57
#include <opencv2/core/utils/logger.hpp>
58
59
namespace cv {
60
namespace dnn {
61
CV__DNN_INLINE_NS_BEGIN
62
63
// this option is useful to run valgrind memory errors detection
64
static bool DNN_DISABLE_MEMORY_OPTIMIZATIONS = utils::getConfigurationParameterBool("OPENCV_DNN_DISABLE_MEMORY_OPTIMIZATIONS", false);
65
66
#ifdef HAVE_OPENCL
67
static bool DNN_OPENCL_ALLOW_ALL_DEVICES = utils::getConfigurationParameterBool("OPENCV_DNN_OPENCL_ALLOW_ALL_DEVICES", false);
68
#endif
69
70
static int PARAM_DNN_BACKEND_DEFAULT = (int)utils::getConfigurationParameterSizeT("OPENCV_DNN_BACKEND_DEFAULT",
71
#ifdef HAVE_INF_ENGINE
72
(size_t)DNN_BACKEND_INFERENCE_ENGINE
73
#else
74
(size_t)DNN_BACKEND_OPENCV
75
#endif
76
);
77
78
// Additional checks (slowdowns execution!)
79
static bool DNN_CHECK_NAN_INF = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF", false);
80
static bool DNN_CHECK_NAN_INF_DUMP = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_DUMP", false);
81
static bool DNN_CHECK_NAN_INF_RAISE_ERROR = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_RAISE_ERROR", false);
82
83
using std::vector;
84
using std::map;
85
using std::make_pair;
86
using std::set;
87
88
namespace
89
{
90
typedef std::vector<MatShape> ShapesVec;
91
92
struct LayerShapes
93
{
94
ShapesVec in, out, internal;
95
// No guarantees that layer which support in-place computations
96
// will be computed in-place (input.data_ptr == output.data_ptr).
97
// If layer said that it could work in-place and layers after it
98
// no longer use input blob, we'll set output = input.
99
bool supportInPlace;
100
LayerShapes() {supportInPlace = false;}
101
};
102
}
103
104
Mat blobFromImage(InputArray image, double scalefactor, const Size& size,
105
const Scalar& mean, bool swapRB, bool crop, int ddepth)
106
{
107
CV_TRACE_FUNCTION();
108
Mat blob;
109
blobFromImage(image, blob, scalefactor, size, mean, swapRB, crop, ddepth);
110
return blob;
111
}
112
113
void blobFromImage(InputArray image, OutputArray blob, double scalefactor,
114
const Size& size, const Scalar& mean, bool swapRB, bool crop, int ddepth)
115
{
116
CV_TRACE_FUNCTION();
117
std::vector<Mat> images(1, image.getMat());
118
blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop, ddepth);
119
}
120
121
Mat blobFromImages(InputArrayOfArrays images, double scalefactor, Size size,
122
const Scalar& mean, bool swapRB, bool crop, int ddepth)
123
{
124
CV_TRACE_FUNCTION();
125
Mat blob;
126
blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop, ddepth);
127
return blob;
128
}
129
130
void blobFromImages(InputArrayOfArrays images_, OutputArray blob_, double scalefactor,
131
Size size, const Scalar& mean_, bool swapRB, bool crop, int ddepth)
132
{
133
CV_TRACE_FUNCTION();
134
CV_CheckType(ddepth, ddepth == CV_32F || ddepth == CV_8U, "Blob depth should be CV_32F or CV_8U");
135
if (ddepth == CV_8U)
136
{
137
CV_CheckEQ(scalefactor, 1.0, "Scaling is not supported for CV_8U blob depth");
138
CV_Assert(mean_ == Scalar() && "Mean subtraction is not supported for CV_8U blob depth");
139
}
140
141
std::vector<Mat> images;
142
images_.getMatVector(images);
143
CV_Assert(!images.empty());
144
for (int i = 0; i < images.size(); i++)
145
{
146
Size imgSize = images[i].size();
147
if (size == Size())
148
size = imgSize;
149
if (size != imgSize)
150
{
151
if(crop)
152
{
153
float resizeFactor = std::max(size.width / (float)imgSize.width,
154
size.height / (float)imgSize.height);
155
resize(images[i], images[i], Size(), resizeFactor, resizeFactor, INTER_LINEAR);
156
Rect crop(Point(0.5 * (images[i].cols - size.width),
157
0.5 * (images[i].rows - size.height)),
158
size);
159
images[i] = images[i](crop);
160
}
161
else
162
resize(images[i], images[i], size, 0, 0, INTER_LINEAR);
163
}
164
if(images[i].depth() == CV_8U && ddepth == CV_32F)
165
images[i].convertTo(images[i], CV_32F);
166
Scalar mean = mean_;
167
if (swapRB)
168
std::swap(mean[0], mean[2]);
169
170
images[i] -= mean;
171
images[i] *= scalefactor;
172
}
173
174
size_t i, nimages = images.size();
175
Mat image0 = images[0];
176
int nch = image0.channels();
177
CV_Assert(image0.dims == 2);
178
Mat image;
179
if (nch == 3 || nch == 4)
180
{
181
int sz[] = { (int)nimages, nch, image0.rows, image0.cols };
182
blob_.create(4, sz, ddepth);
183
Mat blob = blob_.getMat();
184
Mat ch[4];
185
186
for( i = 0; i < nimages; i++ )
187
{
188
image = images[i];
189
CV_Assert(image.depth() == blob_.depth());
190
nch = image.channels();
191
CV_Assert(image.dims == 2 && (nch == 3 || nch == 4));
192
CV_Assert(image.size() == image0.size());
193
194
for( int j = 0; j < nch; j++ )
195
ch[j] = Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, j));
196
if(swapRB)
197
std::swap(ch[0], ch[2]);
198
split(image, ch);
199
}
200
}
201
else
202
{
203
CV_Assert(nch == 1);
204
int sz[] = { (int)nimages, 1, image0.rows, image0.cols };
205
blob_.create(4, sz, ddepth);
206
Mat blob = blob_.getMat();
207
208
for( i = 0; i < nimages; i++ )
209
{
210
Mat image = images[i];
211
CV_Assert(image.depth() == blob_.depth());
212
nch = image.channels();
213
CV_Assert(image.dims == 2 && (nch == 1));
214
CV_Assert(image.size() == image0.size());
215
216
image.copyTo(Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, 0)));
217
}
218
}
219
}
220
221
void imagesFromBlob(const cv::Mat& blob_, OutputArrayOfArrays images_)
222
{
223
CV_TRACE_FUNCTION();
224
225
//A blob is a 4 dimensional matrix in floating point precision
226
//blob_[0] = batchSize = nbOfImages
227
//blob_[1] = nbOfChannels
228
//blob_[2] = height
229
//blob_[3] = width
230
CV_Assert(blob_.depth() == CV_32F);
231
CV_Assert(blob_.dims == 4);
232
233
images_.create(cv::Size(1, blob_.size[0]), blob_.depth());
234
235
std::vector<Mat> vectorOfChannels(blob_.size[1]);
236
for (int n = 0; n < blob_.size[0]; ++n)
237
{
238
for (int c = 0; c < blob_.size[1]; ++c)
239
{
240
vectorOfChannels[c] = getPlane(blob_, n, c);
241
}
242
cv::merge(vectorOfChannels, images_.getMatRef(n));
243
}
244
}
245
246
class OpenCLBackendWrapper : public BackendWrapper
247
{
248
public:
249
OpenCLBackendWrapper(Mat& m) : BackendWrapper(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL)
250
{
251
m.copyTo(umat);
252
host = &m;
253
hostDirty = false;
254
}
255
256
OpenCLBackendWrapper(const Ptr<BackendWrapper>& baseBuffer, Mat& m)
257
: BackendWrapper(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL)
258
{
259
Ptr<OpenCLBackendWrapper> base = baseBuffer.dynamicCast<OpenCLBackendWrapper>();
260
CV_Assert(!base.empty());
261
262
host = &m;
263
264
int shape[] = {1, (int)base->umat.total()};
265
umat = base->umat.reshape(1, 2, &shape[0])
266
.colRange(0, host->total())
267
.reshape(1, host->dims, &host->size[0]);
268
hostDirty = false;
269
}
270
271
static Ptr<BackendWrapper> create(Mat& m)
272
{
273
return Ptr<BackendWrapper>(new OpenCLBackendWrapper(m));
274
}
275
276
static Ptr<BackendWrapper> create(const Ptr<BackendWrapper>& baseBuffer, Mat& m)
277
{
278
return Ptr<BackendWrapper>(new OpenCLBackendWrapper(baseBuffer, m));
279
}
280
281
static std::vector<UMat> getUMatVector(const std::vector<Ptr<BackendWrapper> >& wrappers)
282
{
283
const int numWrappers = wrappers.size();
284
std::vector<UMat> mats(wrappers.size());
285
for (int i = 0; i < numWrappers; ++i)
286
{
287
Ptr<OpenCLBackendWrapper> umatWrapper = wrappers[i].dynamicCast<OpenCLBackendWrapper>();
288
CV_Assert(!umatWrapper.empty());
289
umatWrapper->copyToDevice();
290
mats[i] = umatWrapper->umat;
291
}
292
return mats;
293
}
294
295
// Replaces all umats in wrappers to specific ones.
296
static void update(const std::vector<Ptr<BackendWrapper> >& wrappers,
297
const std::vector<UMat>& umats)
298
{
299
CV_Assert(wrappers.size() == umats.size());
300
for (int i = 0, n = umats.size(); i < n; ++i)
301
{
302
Ptr<OpenCLBackendWrapper> umatWrapper = wrappers[i].dynamicCast<OpenCLBackendWrapper>();
303
CV_Assert(!umatWrapper.empty());
304
umatWrapper->umat = umats[i];
305
}
306
}
307
308
~OpenCLBackendWrapper() {}
309
310
// Copies data from device to a host memory.
311
virtual void copyToHost() CV_OVERRIDE
312
{
313
umat.copyTo(*host);
314
}
315
316
virtual void setHostDirty() CV_OVERRIDE
317
{
318
hostDirty = true;
319
};
320
321
void copyToDevice()
322
{
323
if (hostDirty)
324
{
325
host->copyTo(umat);
326
hostDirty = false;
327
}
328
}
329
330
private:
331
UMat umat;
332
Mat* host;
333
bool hostDirty;
334
};
335
336
struct LayerPin
337
{
338
int lid;
339
int oid;
340
341
LayerPin(int layerId = -1, int outputId = -1)
342
: lid(layerId), oid(outputId) {}
343
344
bool valid() const
345
{
346
return (lid >= 0 && oid >= 0);
347
}
348
349
bool equal(const LayerPin &r) const
350
{
351
return (lid == r.lid && oid == r.oid);
352
}
353
354
bool operator<(const LayerPin &r) const
355
{
356
return lid < r.lid || lid == r.lid && oid < r.oid;
357
}
358
359
bool operator ==(const LayerPin &r) const
360
{
361
return lid == r.lid && oid == r.oid;
362
}
363
};
364
365
struct LayerData
366
{
367
LayerData() : id(-1), skip(false), flag(0) {}
368
LayerData(int _id, const String &_name, const String &_type, LayerParams &_params)
369
: id(_id), name(_name), type(_type), params(_params), skip(false), flag(0)
370
{
371
CV_TRACE_FUNCTION();
372
373
//add logging info
374
params.name = name;
375
params.type = type;
376
}
377
378
int id;
379
String name;
380
String type;
381
LayerParams params;
382
383
std::vector<LayerPin> inputBlobsId;
384
std::set<int> inputLayersId;
385
std::set<int> requiredOutputs;
386
std::vector<LayerPin> consumers;
387
std::vector<Ptr<BackendWrapper> > outputBlobsWrappers;
388
std::vector<Ptr<BackendWrapper> > inputBlobsWrappers;
389
std::vector<Ptr<BackendWrapper> > internalBlobsWrappers;
390
391
Ptr<Layer> layerInstance;
392
std::vector<Mat> outputBlobs;
393
std::vector<Mat*> inputBlobs;
394
std::vector<Mat> internals;
395
// Computation nodes of implemented backends (except DEFAULT).
396
std::map<int, Ptr<BackendNode> > backendNodes;
397
// Flag for skip layer computation for specific backend.
398
bool skip;
399
400
int flag;
401
402
Ptr<Layer> getLayerInstance()
403
{
404
CV_TRACE_FUNCTION();
405
CV_TRACE_ARG_VALUE(type, "type", type.c_str());
406
407
if (layerInstance)
408
return layerInstance;
409
410
layerInstance = LayerFactory::createLayerInstance(type, params);
411
if (!layerInstance)
412
{
413
CV_Error(Error::StsError, "Can't create layer \"" + name + "\" of type \"" + type + "\"");
414
}
415
416
return layerInstance;
417
}
418
};
419
420
//fake layer containing network input blobs
421
struct DataLayer : public Layer
422
{
423
DataLayer() : Layer()
424
{
425
skip = false;
426
}
427
428
virtual bool supportBackend(int backendId) CV_OVERRIDE
429
{
430
return backendId == DNN_BACKEND_OPENCV ||
431
backendId == DNN_BACKEND_INFERENCE_ENGINE && inputsData.size() == 1;
432
}
433
434
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
435
{
436
CV_TRACE_FUNCTION();
437
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
438
439
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
440
forward_ocl(inputs_arr, outputs_arr, internals_arr))
441
442
if (outputs_arr.depth() == CV_16S)
443
{
444
forward_fallback(inputs_arr, outputs_arr, internals_arr);
445
return;
446
}
447
448
std::vector<Mat> outputs, internals;
449
outputs_arr.getMatVector(outputs);
450
internals_arr.getMatVector(internals);
451
452
// Supported modes:
453
// | Input type | Output type |
454
// | fp32 | fp32 |
455
// | uint8 | fp32 |
456
for (int i = 0; i < inputsData.size(); ++i)
457
{
458
double scale = scaleFactors[i];
459
Scalar& mean = means[i];
460
CV_Assert(mean == Scalar() || inputsData[i].size[1] <= 4);
461
CV_CheckTypeEQ(outputs[i].type(), CV_32FC1, "");
462
463
bool singleMean = true;
464
for (int j = 1; j < std::min(4, inputsData[i].size[1]) && singleMean; ++j)
465
{
466
singleMean = mean[j] == mean[j - 1];
467
}
468
469
if (singleMean)
470
{
471
inputsData[i].convertTo(outputs[i], CV_32F, scale, -mean[0] * scale);
472
}
473
else
474
{
475
for (int n = 0; n < inputsData[i].size[0]; ++n)
476
for (int c = 0; c < inputsData[i].size[1]; ++c)
477
{
478
Mat inp = getPlane(inputsData[i], n, c);
479
Mat out = getPlane(outputs[i], n, c);
480
inp.convertTo(out, CV_32F, scale, -mean[c] * scale);
481
}
482
}
483
}
484
}
485
486
#ifdef HAVE_OPENCL
487
std::vector<Mat> tmp_expressions;
488
bool forward_ocl(InputArrayOfArrays, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
489
{
490
// Supported modes:
491
// | Input type | Output type |
492
// | fp32 | fp32 |
493
// | fp32 | fp16 |
494
// | uint8 | fp32 |
495
std::vector<UMat> outputs;
496
outputs_.getUMatVector(outputs);
497
498
tmp_expressions.clear();
499
for (int i = 0; i < inputsData.size(); ++i)
500
{
501
Mat inputData = inputsData[i];
502
503
double scale = scaleFactors[i];
504
Scalar& mean = means[i];
505
506
CV_Assert(mean == Scalar() || inputsData[i].size[1] <= 4);
507
bool singleMean = true;
508
for (int j = 1; j < std::min(4, inputsData[i].size[1]) && singleMean; ++j)
509
{
510
singleMean = mean[j] == mean[j - 1];
511
}
512
513
if (outputs_.depth() == CV_16S)
514
{
515
if (singleMean)
516
{
517
tmp_expressions.push_back(Mat(scale * (inputsData[i] - mean[0])));
518
convertFp16(tmp_expressions.back(), outputs[i]);
519
}
520
else
521
{
522
for (int n = 0; n < inputsData[i].size[0]; ++n)
523
for (int c = 0; c < inputsData[i].size[1]; ++c)
524
{
525
Mat inp = getPlane(inputsData[i], n, c);
526
527
std::vector<cv::Range> plane(4, Range::all());
528
plane[0] = Range(n, n + 1);
529
plane[1] = Range(c, c + 1);
530
UMat out = outputs[i](plane).reshape(1, inp.dims, inp.size);
531
532
tmp_expressions.push_back(scale * (inp - mean[c]));
533
convertFp16(tmp_expressions.back(), out);
534
}
535
}
536
}
537
else
538
{
539
CV_Assert(outputs_.depth() == CV_32F);
540
if (singleMean)
541
{
542
inputsData[i].convertTo(outputs[i], CV_32F, scale, -mean[0] * scale);
543
}
544
else
545
{
546
for (int n = 0; n < inputsData[i].size[0]; ++n)
547
for (int c = 0; c < inputsData[i].size[1]; ++c)
548
{
549
Mat inp = getPlane(inputsData[i], n, c);
550
551
std::vector<cv::Range> plane(4, Range::all());
552
plane[0] = Range(n, n + 1);
553
plane[1] = Range(c, c + 1);
554
UMat out = outputs[i](plane).reshape(1, inp.dims, inp.size);
555
556
inp.convertTo(out, CV_32F, scale, -mean[c] * scale);
557
}
558
}
559
}
560
}
561
return true;
562
}
563
#endif
564
565
int outputNameToIndex(const String& tgtName) CV_OVERRIDE
566
{
567
int idx = (int)(std::find(outNames.begin(), outNames.end(), tgtName) - outNames.begin());
568
return (idx < (int)outNames.size()) ? idx : -1;
569
}
570
571
void setNames(const std::vector<String> &names)
572
{
573
outNames.assign(names.begin(), names.end());
574
}
575
576
bool getMemoryShapes(const std::vector<MatShape> &inputs,
577
const int requiredOutputs,
578
std::vector<MatShape> &outputs,
579
std::vector<MatShape> &internals) const CV_OVERRIDE
580
{
581
CV_Assert(inputs.size() == requiredOutputs);
582
outputs.assign(inputs.begin(), inputs.end());
583
return false;
584
}
585
586
virtual void finalize(InputArrayOfArrays, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
587
{
588
std::vector<Mat> outputs;
589
outputs_arr.getMatVector(outputs);
590
591
CV_Assert_N(outputs.size() == scaleFactors.size(), outputs.size() == means.size(),
592
inputsData.size() == outputs.size());
593
skip = true;
594
for (int i = 0; skip && i < inputsData.size(); ++i)
595
{
596
if (inputsData[i].data != outputs[i].data || scaleFactors[i] != 1.0 || means[i] != Scalar())
597
skip = false;
598
}
599
}
600
601
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
602
{
603
#ifdef HAVE_INF_ENGINE
604
InferenceEngine::LayerParams lp;
605
lp.name = name;
606
lp.type = "ScaleShift";
607
lp.precision = InferenceEngine::Precision::FP32;
608
std::shared_ptr<InferenceEngine::ScaleShiftLayer> ieLayer(new InferenceEngine::ScaleShiftLayer(lp));
609
610
CV_CheckEQ(inputsData.size(), (size_t)1, "");
611
CV_CheckEQ(inputsData[0].dims, 4, "");
612
const size_t numChannels = inputsData[0].size[1];
613
CV_Assert(numChannels <= 4);
614
615
// Scale
616
auto weights = InferenceEngine::make_shared_blob<float>(InferenceEngine::Precision::FP32,
617
{numChannels});
618
weights->allocate();
619
weights->set(std::vector<float>(numChannels, scaleFactors[0]));
620
ieLayer->_weights = weights;
621
622
// Mean subtraction
623
auto biases = InferenceEngine::make_shared_blob<float>(InferenceEngine::Precision::FP32,
624
{numChannels});
625
biases->allocate();
626
std::vector<float> biasesVec(numChannels);
627
for (int i = 0; i < numChannels; ++i)
628
{
629
biasesVec[i] = -means[0][i] * scaleFactors[0];
630
}
631
biases->set(biasesVec);
632
ieLayer->_biases = biases;
633
634
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
635
#endif // HAVE_INF_ENGINE
636
return Ptr<BackendNode>();
637
}
638
639
std::vector<String> outNames;
640
// Preprocessing parameters for each network's input.
641
std::vector<double> scaleFactors;
642
std::vector<Scalar> means;
643
std::vector<Mat> inputsData;
644
bool skip;
645
};
646
647
struct BlobManager
648
{
649
public:
650
// Increase references counter to layer output.
651
void addReference(const LayerPin& lp)
652
{
653
std::map<LayerPin, int>::iterator it = refCounter.find(lp);
654
if (it == refCounter.end())
655
refCounter[lp] = 1;
656
else
657
it->second += 1;
658
}
659
660
void addReferences(const std::vector<LayerPin>& pins)
661
{
662
for (int i = 0; i < pins.size(); i++)
663
{
664
addReference(pins[i]);
665
}
666
}
667
668
// Returns number of references to allocated memory that used in specific
669
// layer blob.
670
int numReferences(const LayerPin& lp)
671
{
672
std::map<LayerPin, LayerPin>::iterator mapIt = reuseMap.find(lp);
673
CV_Assert(mapIt != reuseMap.end());
674
LayerPin memHost = mapIt->second;
675
676
std::map<LayerPin, int>::iterator refIt = refCounter.find(memHost);
677
CV_Assert(refIt != refCounter.end());
678
return refIt->second;
679
}
680
681
// Reuse data allocated in <host> inside the <user> blob.
682
void reuse(const LayerPin& host, const LayerPin& user)
683
{
684
CV_Assert(reuseMap.find(user) == reuseMap.end());
685
CV_Assert(reuseMap.find(host) != reuseMap.end());
686
LayerPin memHost = reuseMap[host];
687
reuseMap[user] = memHost;
688
if (refCounter.find(memHost) != refCounter.end())
689
{
690
std::map<LayerPin, int>::iterator userRefIt = refCounter.find(user);
691
if (userRefIt != refCounter.end())
692
{
693
refCounter[memHost] += userRefIt->second;
694
refCounter.erase(userRefIt);
695
}
696
else
697
refCounter[memHost] += 1;
698
}
699
}
700
701
// Decrease references counter to allocated memory inside specific blob.
702
void releaseReference(const LayerPin& lp)
703
{
704
std::map<LayerPin, LayerPin>::iterator mapIt = reuseMap.find(lp);
705
CV_Assert(mapIt != reuseMap.end());
706
707
std::map<LayerPin, int>::iterator refIt = refCounter.find(mapIt->second);
708
CV_Assert(refIt != refCounter.end());
709
CV_Assert(refIt->second > 0);
710
refIt->second -= 1;
711
}
712
713
void releaseReferences(const std::vector<LayerPin>& pins)
714
{
715
for (int i = 0; i < pins.size(); i++)
716
{
717
releaseReference(pins[i]);
718
}
719
}
720
721
void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, bool use_half)
722
{
723
if (!DNN_DISABLE_MEMORY_OPTIMIZATIONS)
724
{
725
Mat bestBlob;
726
LayerPin bestBlobPin;
727
728
std::map<LayerPin, Mat>::iterator hostIt;
729
std::map<LayerPin, int>::iterator refIt;
730
731
const int targetTotal = total(shape);
732
int bestBlobTotal = INT_MAX;
733
734
for (hostIt = memHosts.begin(); hostIt != memHosts.end(); ++hostIt)
735
{
736
refIt = refCounter.find(hostIt->first);
737
// Use only blobs that had references before because if not,
738
// it might be used as output.
739
if (refIt != refCounter.end() && refIt->second == 0)
740
{
741
Mat& unusedBlob = hostIt->second;
742
if (unusedBlob.total() >= targetTotal &&
743
unusedBlob.total() < bestBlobTotal)
744
{
745
bestBlobPin = hostIt->first;
746
bestBlob = unusedBlob;
747
bestBlobTotal = unusedBlob.total();
748
}
749
}
750
}
751
if (!bestBlob.empty())
752
{
753
reuse(bestBlobPin, lp);
754
dst = bestBlob.reshape(1, 1).colRange(0, targetTotal).reshape(1, shape);
755
return;
756
}
757
}
758
759
{
760
// if dst already has been allocated with total(shape) elements,
761
// it won't be recreated and pointer of dst.data remains the same.
762
dst.create(shape, use_half ? CV_16S : CV_32F);
763
addHost(lp, dst);
764
}
765
}
766
767
void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes,
768
std::vector<LayerPin>& pinsForInternalBlobs,
769
bool use_half = false)
770
{
771
CV_TRACE_FUNCTION();
772
773
pinsForInternalBlobs.clear();
774
775
std::vector<Mat>& outputBlobs = ld.outputBlobs,
776
&internalBlobs = ld.internals;
777
778
const ShapesVec& outShapes = layerShapes.out,
779
internalShapes = layerShapes.internal;
780
781
outputBlobs.resize(std::max((size_t)1, outShapes.size())); //layer produce at least one output blob
782
internalBlobs.resize(internalShapes.size());
783
784
CV_Assert(ld.requiredOutputs.size() <= outShapes.size());
785
786
// Check that layer could work in-place.
787
bool inPlace = false;
788
if (layerShapes.supportInPlace)
789
{
790
if (ld.inputBlobs.size() == 1)
791
{
792
// Get number of references to the input memory.
793
int numRef = numReferences(ld.inputBlobsId[0]);
794
// If current layer is one and only customer of this blob.
795
inPlace = numRef == 1;
796
}
797
}
798
799
ShapesVec shapes(outShapes);
800
shapes.insert(shapes.end(), internalShapes.begin(), internalShapes.end());
801
std::vector<Mat*> blobs;
802
for(int i = 0; i < outputBlobs.size(); i++)
803
{
804
blobs.push_back(&outputBlobs[i]);
805
}
806
807
for(int i = 0; i < internalBlobs.size(); i++)
808
{
809
blobs.push_back(&internalBlobs[i]);
810
if (total(internalShapes[i]))
811
{
812
pinsForInternalBlobs.push_back(LayerPin(ld.id, ld.outputBlobs.size() + i));
813
}
814
}
815
816
addReferences(pinsForInternalBlobs);
817
818
std::map<int, std::vector<int> > idxSizes;
819
for(int i = 0; i < shapes.size(); i++)
820
{
821
idxSizes[total(shapes[i])].push_back(i);
822
}
823
824
std::map<int, std::vector<int> >::reverse_iterator it;
825
for(it = idxSizes.rbegin(); it != idxSizes.rend(); it++)
826
{
827
for(int j = 0; j < it->second.size(); j++)
828
{
829
int index = it->second[j];
830
if (total(shapes[index]))
831
{
832
LayerPin blobPin(ld.id, index);
833
if (index < outShapes.size() && inPlace)
834
{
835
CV_Assert(ld.inputBlobs[0]->total() == total(shapes[index]));
836
ld.outputBlobs[index] = ld.inputBlobs[0]->reshape(1, shapes[index]);
837
reuse(ld.inputBlobsId[0], blobPin);
838
}
839
else
840
reuseOrCreate(shapes[index], blobPin, *blobs[index], use_half);
841
}
842
}
843
}
844
}
845
846
// Clear internal state. Calls before an every reallocation.
847
void reset()
848
{
849
CV_TRACE_FUNCTION();
850
851
refCounter.clear();
852
reuseMap.clear();
853
memHosts.clear();
854
}
855
856
private:
857
// Register allocated memory.
858
void addHost(const LayerPin& lp, const Mat& mat)
859
{
860
CV_Assert(memHosts.find(lp) == memHosts.end());
861
reuseMap[lp] = lp;
862
memHosts[lp] = mat;
863
}
864
865
std::map<LayerPin, int> refCounter;
866
// Maps pin to origin blob (for whom memory was allocated firstly).
867
// For origin blobs key == value.
868
std::map<LayerPin, LayerPin> reuseMap;
869
std::map<LayerPin, Mat> memHosts;
870
};
871
872
static Ptr<BackendWrapper> wrapMat(int backendId, int targetId, cv::Mat& m)
873
{
874
if (backendId == DNN_BACKEND_OPENCV)
875
{
876
if (targetId == DNN_TARGET_CPU)
877
return Ptr<BackendWrapper>();
878
else if (IS_DNN_OPENCL_TARGET(targetId))
879
return OpenCLBackendWrapper::create(m);
880
else
881
CV_Error(Error::StsNotImplemented, "Unknown target identifier");
882
}
883
else if (backendId == DNN_BACKEND_HALIDE)
884
{
885
CV_Assert(haveHalide());
886
#ifdef HAVE_HALIDE
887
return Ptr<BackendWrapper>(new HalideBackendWrapper(targetId, m));
888
#endif // HAVE_HALIDE
889
}
890
else if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
891
{
892
CV_Assert(haveInfEngine());
893
#ifdef HAVE_INF_ENGINE
894
return Ptr<BackendWrapper>(new InfEngineBackendWrapper(targetId, m));
895
#endif // HAVE_INF_ENGINE
896
}
897
else if (backendId == DNN_BACKEND_VKCOM)
898
{
899
CV_Assert(haveVulkan());
900
#ifdef HAVE_VULKAN
901
return Ptr<BackendWrapper>(new VkComBackendWrapper(m));
902
#endif // HAVE_VULKAN
903
}
904
else
905
CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
906
return Ptr<BackendWrapper>();
907
}
908
909
struct Net::Impl
910
{
911
typedef std::map<int, LayerShapes> LayersShapesMap;
912
typedef std::map<int, LayerData> MapIdToLayerData;
913
914
~Impl()
915
{
916
#ifdef HAVE_VULKAN
917
// Vulkan requires explicit releasing the child objects of
918
// VkDevice object prior to releasing VkDevice object itself.
919
layers.clear();
920
backendWrappers.clear();
921
vkcom::deinitPerThread();
922
#endif
923
}
924
Impl()
925
{
926
#ifdef HAVE_VULKAN
927
vkcom::initPerThread();
928
#endif
929
//allocate fake net input layer
930
netInputLayer = Ptr<DataLayer>(new DataLayer());
931
LayerData &inpl = layers.insert( make_pair(0, LayerData()) ).first->second;
932
inpl.id = 0;
933
netInputLayer->name = inpl.name = "_input";
934
inpl.type = "__NetInputLayer__";
935
inpl.layerInstance = netInputLayer;
936
layerNameToId.insert(std::make_pair(inpl.name, inpl.id));
937
938
lastLayerId = 0;
939
netWasAllocated = false;
940
fusion = true;
941
preferableBackend = DNN_BACKEND_DEFAULT;
942
preferableTarget = DNN_TARGET_CPU;
943
skipInfEngineInit = false;
944
}
945
946
Ptr<DataLayer> netInputLayer;
947
std::vector<LayerPin> blobsToKeep;
948
MapIdToLayerData layers;
949
std::map<String, int> layerNameToId;
950
BlobManager blobManager;
951
int preferableBackend;
952
int preferableTarget;
953
String halideConfigFile;
954
bool skipInfEngineInit;
955
// Map host data to backend specific wrapper.
956
std::map<void*, Ptr<BackendWrapper> > backendWrappers;
957
958
int lastLayerId;
959
960
bool netWasAllocated;
961
bool fusion;
962
std::vector<int64> layersTimings;
963
Mat output_blob;
964
965
Ptr<BackendWrapper> wrap(Mat& host)
966
{
967
if (preferableBackend == DNN_BACKEND_OPENCV && preferableTarget == DNN_TARGET_CPU)
968
return Ptr<BackendWrapper>();
969
970
MatShape shape(host.dims);
971
for (int i = 0; i < host.dims; ++i)
972
shape[i] = host.size[i];
973
974
void* data = host.data;
975
if (backendWrappers.find(data) != backendWrappers.end())
976
{
977
Ptr<BackendWrapper> baseBuffer = backendWrappers[data];
978
if (preferableBackend == DNN_BACKEND_OPENCV)
979
{
980
CV_Assert(IS_DNN_OPENCL_TARGET(preferableTarget));
981
return OpenCLBackendWrapper::create(baseBuffer, host);
982
}
983
else if (preferableBackend == DNN_BACKEND_HALIDE)
984
{
985
CV_Assert(haveHalide());
986
#ifdef HAVE_HALIDE
987
return Ptr<BackendWrapper>(new HalideBackendWrapper(baseBuffer, shape));
988
#endif // HAVE_HALIDE
989
}
990
else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE)
991
{
992
return wrapMat(preferableBackend, preferableTarget, host);
993
}
994
else if (preferableBackend == DNN_BACKEND_VKCOM)
995
{
996
#ifdef HAVE_VULKAN
997
return Ptr<BackendWrapper>(new VkComBackendWrapper(baseBuffer, host));
998
#endif
999
}
1000
else
1001
CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
1002
}
1003
1004
Ptr<BackendWrapper> wrapper = wrapMat(preferableBackend, preferableTarget, host);
1005
backendWrappers[data] = wrapper;
1006
return wrapper;
1007
}
1008
1009
#ifdef HAVE_HALIDE
1010
void compileHalide()
1011
{
1012
CV_TRACE_FUNCTION();
1013
1014
CV_Assert(preferableBackend == DNN_BACKEND_HALIDE);
1015
1016
HalideScheduler scheduler(halideConfigFile);
1017
std::vector< std::reference_wrapper<LayerData> > compileList; compileList.reserve(64);
1018
for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it)
1019
{
1020
LayerData &ld = it->second;
1021
Ptr<Layer> layer = ld.layerInstance;
1022
if (layer->supportBackend(DNN_BACKEND_HALIDE) && !ld.skip)
1023
{
1024
CV_Assert(!ld.backendNodes[DNN_BACKEND_HALIDE].empty());
1025
bool scheduled = scheduler.process(ld.backendNodes[DNN_BACKEND_HALIDE]);
1026
if (!scheduled)
1027
{
1028
// Use automatic scheduling provided by layer.
1029
layer->applyHalideScheduler(ld.backendNodes[DNN_BACKEND_HALIDE],
1030
ld.inputBlobs, ld.outputBlobs,
1031
preferableTarget);
1032
}
1033
compileList.emplace_back(ld);
1034
}
1035
}
1036
std::atomic<int> progress(0);
1037
auto fn = ([&] () -> void
1038
{
1039
for (;;)
1040
{
1041
int id = progress.fetch_add(1);
1042
if ((size_t)id >= compileList.size())
1043
return;
1044
const LayerData& ld = compileList[id].get();
1045
Ptr<BackendNode> node = ld.backendNodes.find(DNN_BACKEND_HALIDE)->second;
1046
dnn::compileHalide(ld.outputBlobs, node, preferableTarget);
1047
}
1048
});
1049
size_t num_threads = std::min(compileList.size(), (size_t)std::thread::hardware_concurrency());
1050
num_threads = std::max((size_t)1u, std::min((size_t)8u, num_threads));
1051
std::vector<std::thread> threads(num_threads - 1);
1052
for (auto& t: threads) t = std::thread(fn);
1053
fn(); // process own tasks
1054
for (auto& t: threads) t.join();
1055
}
1056
#endif
1057
1058
void clear()
1059
{
1060
CV_TRACE_FUNCTION();
1061
1062
MapIdToLayerData::iterator it;
1063
for (it = layers.begin(); it != layers.end(); it++)
1064
{
1065
if (it->second.id != 0) {
1066
it->second.inputBlobs.clear();
1067
it->second.outputBlobs.clear();
1068
it->second.internals.clear();
1069
}
1070
it->second.skip = false;
1071
//it->second.consumers.clear();
1072
Ptr<Layer> currLayer = it->second.layerInstance;
1073
1074
if( currLayer.empty() )
1075
continue;
1076
1077
currLayer->unsetAttached();
1078
1079
Ptr<PoolingLayer> poolingLayer = currLayer.dynamicCast<PoolingLayer>();
1080
if( !poolingLayer.empty() )
1081
{
1082
poolingLayer->computeMaxIdx = true;
1083
}
1084
}
1085
1086
layersTimings.clear();
1087
}
1088
1089
void setUpNet(const std::vector<LayerPin>& blobsToKeep_ = std::vector<LayerPin>())
1090
{
1091
CV_TRACE_FUNCTION();
1092
1093
if (preferableBackend == DNN_BACKEND_DEFAULT)
1094
preferableBackend = (Backend)PARAM_DNN_BACKEND_DEFAULT;
1095
1096
CV_Assert(preferableBackend != DNN_BACKEND_OPENCV ||
1097
preferableTarget == DNN_TARGET_CPU ||
1098
preferableTarget == DNN_TARGET_OPENCL ||
1099
preferableTarget == DNN_TARGET_OPENCL_FP16);
1100
CV_Assert(preferableBackend != DNN_BACKEND_HALIDE ||
1101
preferableTarget == DNN_TARGET_CPU ||
1102
preferableTarget == DNN_TARGET_OPENCL);
1103
CV_Assert(preferableBackend != DNN_BACKEND_INFERENCE_ENGINE ||
1104
preferableTarget == DNN_TARGET_CPU ||
1105
preferableTarget == DNN_TARGET_OPENCL ||
1106
preferableTarget == DNN_TARGET_OPENCL_FP16 ||
1107
preferableTarget == DNN_TARGET_MYRIAD);
1108
CV_Assert(preferableBackend != DNN_BACKEND_VKCOM ||
1109
preferableTarget == DNN_TARGET_VULKAN);
1110
if (!netWasAllocated || this->blobsToKeep != blobsToKeep_)
1111
{
1112
if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
1113
#ifndef HAVE_OPENCL
1114
{
1115
CV_LOG_WARNING(NULL, "DNN: OpenCL target is not available in this OpenCV build, switching to CPU.");
1116
preferableTarget = DNN_TARGET_CPU;
1117
}
1118
#else
1119
{
1120
if (!DNN_OPENCL_ALLOW_ALL_DEVICES)
1121
{
1122
// Current implementation is only valid for GPU (#11494)
1123
if (ocl::Device::getDefault().type() != ocl::Device::TYPE_GPU)
1124
{
1125
CV_LOG_WARNING(NULL, "DNN: OpenCL target is not supported with current OpenCL device (tested with GPUs only), switching to CPU.");
1126
preferableTarget = DNN_TARGET_CPU;
1127
}
1128
else if (preferableTarget == DNN_TARGET_OPENCL_FP16 && !ocl::Device::getDefault().isIntel())
1129
{
1130
CV_LOG_WARNING(NULL,
1131
"DNN: OpenCL target with fp16 precision is not supported "
1132
"with current OpenCL device (tested with Intel GPUs only), "
1133
"switching to OpenCL with fp32 precision.");
1134
preferableTarget = DNN_TARGET_OPENCL;
1135
}
1136
}
1137
}
1138
#endif
1139
if (preferableBackend == DNN_BACKEND_VKCOM && !haveVulkan())
1140
{
1141
preferableBackend = DNN_BACKEND_OPENCV;
1142
preferableTarget = DNN_TARGET_CPU;
1143
}
1144
1145
clear();
1146
1147
allocateLayers(blobsToKeep_);
1148
1149
MapIdToLayerData::iterator it = layers.find(0);
1150
CV_Assert(it != layers.end());
1151
it->second.skip = netInputLayer->skip;
1152
1153
initBackend();
1154
1155
if (!netWasAllocated )
1156
{
1157
#ifdef HAVE_HALIDE
1158
if (preferableBackend == DNN_BACKEND_HALIDE)
1159
compileHalide();
1160
#else
1161
CV_Assert(preferableBackend != DNN_BACKEND_HALIDE);
1162
#endif
1163
}
1164
1165
netWasAllocated = true;
1166
this->blobsToKeep = blobsToKeep_;
1167
}
1168
}
1169
1170
int getLayerId(const String &layerName)
1171
{
1172
std::map<String, int>::iterator it = layerNameToId.find(layerName);
1173
return (it != layerNameToId.end()) ? it->second : -1;
1174
}
1175
1176
int getLayerId(int id)
1177
{
1178
MapIdToLayerData::iterator it = layers.find(id);
1179
return (it != layers.end()) ? id : -1;
1180
}
1181
1182
int getLayerId(DictValue &layerDesc)
1183
{
1184
if (layerDesc.isInt())
1185
return getLayerId(layerDesc.get<int>());
1186
else if (layerDesc.isString())
1187
return getLayerId(layerDesc.get<String>());
1188
1189
CV_Assert(layerDesc.isInt() || layerDesc.isString());
1190
return -1;
1191
}
1192
1193
String getLayerName(int id)
1194
{
1195
MapIdToLayerData::iterator it = layers.find(id);
1196
return (it != layers.end()) ? it->second.name : "(unknown layer)";
1197
}
1198
1199
LayerData& getLayerData(int id)
1200
{
1201
MapIdToLayerData::iterator it = layers.find(id);
1202
1203
if (it == layers.end())
1204
CV_Error(Error::StsObjectNotFound, format("Layer with requested id=%d not found", id));
1205
1206
return it->second;
1207
}
1208
1209
LayerData& getLayerData(const String &layerName)
1210
{
1211
int id = getLayerId(layerName);
1212
1213
if (id < 0)
1214
CV_Error(Error::StsError, "Requested layer \"" + layerName + "\" not found");
1215
1216
return getLayerData(id);
1217
}
1218
1219
LayerData& getLayerData(const DictValue &layerDesc)
1220
{
1221
CV_Assert(layerDesc.isInt() || layerDesc.isString());
1222
if (layerDesc.isInt())
1223
return getLayerData(layerDesc.get<int>());
1224
else /*if (layerDesc.isString())*/
1225
return getLayerData(layerDesc.get<String>());
1226
}
1227
1228
static void addLayerInput(LayerData &ld, int inNum, LayerPin from)
1229
{
1230
if ((int)ld.inputBlobsId.size() <= inNum)
1231
{
1232
ld.inputBlobsId.resize(inNum + 1);
1233
}
1234
else
1235
{
1236
LayerPin storedFrom = ld.inputBlobsId[inNum];
1237
if (storedFrom.valid() && !storedFrom.equal(from))
1238
CV_Error(Error::StsError, format("Input #%d of layer \"%s\" already was connected",
1239
inNum, ld.name.c_str()));
1240
}
1241
1242
ld.inputBlobsId[inNum] = from;
1243
}
1244
1245
int resolvePinOutputName(LayerData &ld, const String &outName)
1246
{
1247
if (outName.empty())
1248
return 0;
1249
return ld.getLayerInstance()->outputNameToIndex(outName);
1250
}
1251
1252
LayerPin getPinByAlias(const String &layerName)
1253
{
1254
LayerPin pin;
1255
pin.lid = (layerName.empty()) ? 0 : getLayerId(layerName);
1256
1257
if (pin.lid >= 0)
1258
pin.oid = resolvePinOutputName(getLayerData(pin.lid), layerName);
1259
1260
return pin;
1261
}
1262
1263
std::vector<LayerPin> getLayerOutPins(const String &layerName)
1264
{
1265
int lid = (layerName.empty()) ? 0 : getLayerId(layerName);
1266
1267
std::vector<LayerPin> pins;
1268
1269
for (int i = 0; i < layers[lid].outputBlobs.size(); i++)
1270
{
1271
pins.push_back(LayerPin(lid, i));
1272
}
1273
1274
return pins;
1275
}
1276
1277
void connect(int outLayerId, int outNum, int inLayerId, int inNum)
1278
{
1279
CV_Assert(outLayerId < inLayerId);
1280
LayerData &ldOut = getLayerData(outLayerId);
1281
LayerData &ldInp = getLayerData(inLayerId);
1282
1283
addLayerInput(ldInp, inNum, LayerPin(outLayerId, outNum));
1284
ldOut.requiredOutputs.insert(outNum);
1285
ldOut.consumers.push_back(LayerPin(inLayerId, outNum));
1286
}
1287
1288
void initBackend()
1289
{
1290
CV_TRACE_FUNCTION();
1291
if (preferableBackend == DNN_BACKEND_OPENCV)
1292
CV_Assert(preferableTarget == DNN_TARGET_CPU || IS_DNN_OPENCL_TARGET(preferableTarget));
1293
else if (preferableBackend == DNN_BACKEND_HALIDE)
1294
initHalideBackend();
1295
else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE)
1296
initInfEngineBackend();
1297
else if (preferableBackend == DNN_BACKEND_VKCOM)
1298
initVkComBackend();
1299
else
1300
CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
1301
}
1302
1303
void initHalideBackend()
1304
{
1305
CV_TRACE_FUNCTION();
1306
CV_Assert_N(preferableBackend == DNN_BACKEND_HALIDE, haveHalide());
1307
1308
// Iterator to current layer.
1309
MapIdToLayerData::iterator it = layers.begin();
1310
// Iterator to base layer for fusion. In example, in case of conv+bn+relu
1311
// it'll be a conv layer.
1312
MapIdToLayerData::iterator baseIt = layers.begin();
1313
for (; it != layers.end(); it++)
1314
{
1315
LayerData &ldTop = it->second;
1316
Ptr<Layer> layerTop = ldTop.layerInstance;
1317
if (!layerTop->supportBackend(preferableBackend))
1318
{
1319
// Move base iterator to layer that don't support preferable
1320
// backend to prevent fusion over layer of different backend.
1321
baseIt = it;
1322
continue;
1323
}
1324
// Try to do layers fusion.
1325
LayerData &ldBot = baseIt->second;
1326
Ptr<Layer> layerBot = ldBot.layerInstance;
1327
// 1. Check that bottom and top from the same backends.
1328
if (it != layers.begin() && layerBot->supportBackend(preferableBackend))
1329
{
1330
// 2. Check that current layer works in-place.
1331
bool inPlace = ldTop.inputBlobs.size() == 1 &&
1332
ldBot.outputBlobs.size() == 1 &&
1333
ldTop.inputBlobs[0]->data ==
1334
ldBot.outputBlobs[0].data;
1335
if (inPlace)
1336
{
1337
// 3. Try to attach node.
1338
CV_Assert(!ldBot.backendNodes[preferableBackend].empty());
1339
Ptr<BackendNode> fusedNode =
1340
layerTop->tryAttach(ldBot.backendNodes[preferableBackend]);
1341
if (!fusedNode.empty())
1342
{
1343
ldTop.skip = true;
1344
ldBot.backendNodes[preferableBackend] = fusedNode;
1345
ldBot.outputBlobsWrappers = ldTop.outputBlobsWrappers;
1346
continue;
1347
}
1348
}
1349
}
1350
// No layers fusion.
1351
ldTop.skip = false;
1352
ldTop.backendNodes[DNN_BACKEND_HALIDE] =
1353
layerTop->initHalide(ldTop.inputBlobsWrappers);
1354
baseIt = it;
1355
}
1356
}
1357
1358
#ifdef HAVE_INF_ENGINE
1359
// Before launching Inference Engine graph we need to specify output blobs.
1360
// This function requests output blobs based on inputs references of
1361
// layers from default backend or layers from different graphs.
1362
void addInfEngineNetOutputs(LayerData &ld)
1363
{
1364
Ptr<InfEngineBackendNet> layerNet;
1365
if (ld.backendNodes.find(preferableBackend) != ld.backendNodes.end())
1366
{
1367
Ptr<BackendNode> node = ld.backendNodes[preferableBackend];
1368
if (!node.empty())
1369
{
1370
Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
1371
CV_Assert(!ieNode.empty()); CV_Assert(!ieNode->net.empty());
1372
layerNet = ieNode->net;
1373
}
1374
}
1375
// For an every input reference we check that it belongs to one of
1376
// the Inference Engine backend graphs. Request an output blob if it is.
1377
// Do nothing if layer's input is from the same graph.
1378
for (int i = 0; i < ld.inputBlobsId.size(); ++i)
1379
{
1380
LayerData &inpLd = layers[ld.inputBlobsId[i].lid];
1381
Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
1382
if (!inpNode.empty())
1383
{
1384
Ptr<InfEngineBackendNode> ieInpNode = inpNode.dynamicCast<InfEngineBackendNode>();
1385
CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty());
1386
if (layerNet != ieInpNode->net)
1387
{
1388
// layerNet is empty or nodes are from different graphs.
1389
ieInpNode->net->addOutput(ieInpNode->layer->name);
1390
}
1391
}
1392
}
1393
}
1394
#endif // HAVE_INF_ENGINE
1395
1396
void initVkComBackend()
1397
{
1398
CV_TRACE_FUNCTION();
1399
CV_Assert(preferableBackend == DNN_BACKEND_VKCOM);
1400
#ifdef HAVE_VULKAN
1401
if (!haveVulkan())
1402
return;
1403
1404
MapIdToLayerData::iterator it = layers.begin();
1405
for (; it != layers.end(); it++)
1406
{
1407
LayerData &ld = it->second;
1408
Ptr<Layer> layer = ld.layerInstance;
1409
if (!layer->supportBackend(preferableBackend))
1410
{
1411
continue;
1412
}
1413
1414
ld.skip = false;
1415
ld.backendNodes[DNN_BACKEND_VKCOM] =
1416
layer->initVkCom(ld.inputBlobsWrappers);
1417
}
1418
#endif
1419
}
1420
1421
void initInfEngineBackend()
1422
{
1423
CV_TRACE_FUNCTION();
1424
CV_Assert_N(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE, haveInfEngine());
1425
#ifdef HAVE_INF_ENGINE
1426
MapIdToLayerData::iterator it;
1427
Ptr<InfEngineBackendNet> net;
1428
1429
for (it = layers.begin(); it != layers.end(); ++it)
1430
{
1431
LayerData &ld = it->second;
1432
if (ld.id == 0)
1433
{
1434
CV_Assert((netInputLayer->outNames.empty() && ld.outputBlobsWrappers.size() == 1) ||
1435
(netInputLayer->outNames.size() == ld.outputBlobsWrappers.size()));
1436
for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
1437
{
1438
InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
1439
dataPtr->name = netInputLayer->outNames.empty() ? ld.name : netInputLayer->outNames[i];
1440
}
1441
}
1442
else
1443
{
1444
for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
1445
{
1446
InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
1447
dataPtr->name = ld.name;
1448
}
1449
}
1450
}
1451
1452
if (skipInfEngineInit)
1453
{
1454
Ptr<BackendNode> node = layers[lastLayerId].backendNodes[preferableBackend];
1455
CV_Assert(!node.empty());
1456
1457
Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
1458
CV_Assert(!ieNode.empty());
1459
1460
for (it = layers.begin(); it != layers.end(); ++it)
1461
{
1462
LayerData &ld = it->second;
1463
if (ld.id == 0)
1464
{
1465
for (int i = 0; i < ld.inputBlobsWrappers.size(); ++i)
1466
{
1467
InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.inputBlobsWrappers[i]);
1468
dataPtr->name = netInputLayer->outNames[i];
1469
}
1470
}
1471
else
1472
{
1473
for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
1474
{
1475
InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
1476
dataPtr->name = ld.name;
1477
}
1478
}
1479
ieNode->net->addBlobs(ld.inputBlobsWrappers);
1480
ieNode->net->addBlobs(ld.outputBlobsWrappers);
1481
ld.skip = true;
1482
}
1483
layers[lastLayerId].skip = false;
1484
ieNode->net->init(preferableTarget);
1485
return;
1486
}
1487
1488
// Build Inference Engine networks from sets of layers that support this
1489
// backend. Split a whole model on several Inference Engine networks if
1490
// some of layers is not implemented.
1491
1492
// Set of all input and output blobs wrappers for current network.
1493
std::map<LayerPin, Ptr<BackendWrapper> > netBlobsWrappers;
1494
for (it = layers.begin(); it != layers.end(); ++it)
1495
{
1496
LayerData &ld = it->second;
1497
if (ld.id == 0 && ld.skip)
1498
continue;
1499
bool fused = ld.skip;
1500
1501
Ptr<Layer> layer = ld.layerInstance;
1502
if (!fused && !layer->supportBackend(preferableBackend))
1503
{
1504
addInfEngineNetOutputs(ld);
1505
net = Ptr<InfEngineBackendNet>();
1506
netBlobsWrappers.clear();
1507
layer->preferableTarget = DNN_TARGET_CPU;
1508
continue;
1509
}
1510
ld.skip = true; // Initially skip all Inference Engine supported layers.
1511
1512
// Create a new network if one of inputs from different Inference Engine graph.
1513
for (int i = 0; i < ld.inputBlobsId.size(); ++i)
1514
{
1515
LayerData &inpLd = layers[ld.inputBlobsId[i].lid];
1516
Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
1517
if (!inpNode.empty())
1518
{
1519
Ptr<InfEngineBackendNode> ieInpNode = inpNode.dynamicCast<InfEngineBackendNode>();
1520
CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty());
1521
if (ieInpNode->net != net)
1522
{
1523
net = Ptr<InfEngineBackendNet>();
1524
netBlobsWrappers.clear();
1525
break;
1526
}
1527
}
1528
}
1529
1530
// The same blobs wrappers cannot be shared between two Inference Engine
1531
// networks because of explicit references between layers and blobs.
1532
// So we need to rewrap all the external blobs.
1533
for (int i = 0; i < ld.inputBlobsId.size(); ++i)
1534
{
1535
LayerPin inPin = ld.inputBlobsId[i];
1536
auto it = netBlobsWrappers.find(inPin);
1537
if (it == netBlobsWrappers.end())
1538
{
1539
ld.inputBlobsWrappers[i] = InfEngineBackendWrapper::create(ld.inputBlobsWrappers[i]);
1540
netBlobsWrappers[inPin] = ld.inputBlobsWrappers[i];
1541
}
1542
else
1543
ld.inputBlobsWrappers[i] = it->second;
1544
}
1545
netBlobsWrappers[LayerPin(ld.id, 0)] = ld.outputBlobsWrappers[0];
1546
1547
Ptr<BackendNode> node;
1548
if (!net.empty())
1549
{
1550
if (fused)
1551
{
1552
bool inPlace = ld.inputBlobsId.size() == 1 && ld.outputBlobs.size() == 1 &&
1553
ld.inputBlobs[0]->data == ld.outputBlobs[0].data;
1554
CV_Assert(inPlace);
1555
node = layers[ld.inputBlobsId[0].lid].backendNodes[preferableBackend];
1556
ld.inputBlobsWrappers = layers[ld.inputBlobsId[0].lid].inputBlobsWrappers;
1557
}
1558
}
1559
else
1560
net = Ptr<InfEngineBackendNet>(new InfEngineBackendNet());
1561
1562
if (!fused)
1563
{
1564
node = layer->initInfEngine(ld.inputBlobsWrappers);
1565
}
1566
else if (node.empty())
1567
continue;
1568
1569
CV_Assert(!node.empty());
1570
ld.backendNodes[preferableBackend] = node;
1571
1572
Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
1573
CV_Assert(!ieNode.empty());
1574
ieNode->net = net;
1575
1576
auto weightableLayer = std::dynamic_pointer_cast<InferenceEngine::WeightableLayer>(ieNode->layer);
1577
if ((preferableTarget == DNN_TARGET_OPENCL_FP16 || preferableTarget == DNN_TARGET_MYRIAD) && !fused)
1578
{
1579
ieNode->layer->precision = InferenceEngine::Precision::FP16;
1580
if (weightableLayer)
1581
{
1582
if (weightableLayer->_weights)
1583
weightableLayer->_weights = convertFp16(weightableLayer->_weights);
1584
if (weightableLayer->_biases)
1585
weightableLayer->_biases = convertFp16(weightableLayer->_biases);
1586
}
1587
else
1588
{
1589
for (const auto& weights : {"weights", "biases"})
1590
{
1591
auto it = ieNode->layer->blobs.find(weights);
1592
if (it != ieNode->layer->blobs.end())
1593
it->second = convertFp16(it->second);
1594
}
1595
}
1596
}
1597
if (weightableLayer)
1598
{
1599
if (weightableLayer->_weights)
1600
weightableLayer->blobs["weights"] = weightableLayer->_weights;
1601
if (weightableLayer->_biases)
1602
weightableLayer->blobs["biases"] = weightableLayer->_biases;
1603
}
1604
ieNode->connect(ld.inputBlobsWrappers, ld.outputBlobsWrappers);
1605
net->addBlobs(ld.inputBlobsWrappers);
1606
net->addBlobs(ld.outputBlobsWrappers);
1607
1608
if (!fused)
1609
net->addLayer(ieNode->layer);
1610
addInfEngineNetOutputs(ld);
1611
}
1612
1613
// Initialize all networks.
1614
std::set<InfEngineBackendNet> initializedNets;
1615
for (MapIdToLayerData::reverse_iterator it = layers.rbegin(); it != layers.rend(); ++it)
1616
{
1617
LayerData &ld = it->second;
1618
if (ld.backendNodes.find(preferableBackend) == ld.backendNodes.end())
1619
continue;
1620
1621
Ptr<BackendNode> node = ld.backendNodes[preferableBackend];
1622
if (node.empty())
1623
continue;
1624
1625
Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
1626
if (ieNode.empty())
1627
continue;
1628
1629
CV_Assert(!ieNode->net.empty());
1630
1631
if (!ieNode->net->isInitialized())
1632
{
1633
ieNode->net->init(preferableTarget);
1634
ld.skip = false;
1635
}
1636
}
1637
#endif // HAVE_INF_ENGINE
1638
}
1639
1640
void allocateLayer(int lid, const LayersShapesMap& layersShapes)
1641
{
1642
CV_TRACE_FUNCTION();
1643
1644
LayerData &ld = layers[lid];
1645
1646
//already allocated
1647
if (ld.flag)
1648
return;
1649
1650
size_t ninputs = ld.inputBlobsId.size();
1651
#if 0
1652
printf("layer %s:", ld.name.c_str());
1653
for (size_t i = 0; i < ninputs; i++)
1654
{
1655
int inp_lid = ld.inputBlobsId[i].lid;
1656
LayerData &inp_ld = layers[inp_lid];
1657
int inp_outputs = (int)inp_ld.outputBlobs.size();
1658
std::cout << " " << inp_ld.name << "(" << inp_outputs;
1659
1660
for( int j = 0; j < inp_outputs; j++ )
1661
{
1662
std::cout << (j == 0 ? ": " : ", ") << inp_ld.outputBlobs[j].size;
1663
}
1664
std::cout << ")";
1665
}
1666
printf("\n");
1667
#endif
1668
1669
//determine parent layers
1670
for (size_t i = 0; i < ninputs; i++)
1671
ld.inputLayersId.insert(ld.inputBlobsId[i].lid);
1672
1673
//allocate parents
1674
for (set<int>::iterator i = ld.inputLayersId.begin(); i != ld.inputLayersId.end(); i++)
1675
allocateLayer(*i, layersShapes);
1676
1677
//bind inputs
1678
if (ld.id == 0) // DataLayer
1679
{
1680
ninputs = netInputLayer->inputsData.size();
1681
ld.inputBlobsWrappers.resize(ninputs);
1682
for (size_t i = 0; i < ninputs; i++)
1683
{
1684
ld.inputBlobsWrappers[i] = wrap(netInputLayer->inputsData[i]);
1685
}
1686
}
1687
else
1688
{
1689
ld.inputBlobs.resize(ninputs);
1690
ld.inputBlobsWrappers.resize(ninputs);
1691
for (size_t i = 0; i < ninputs; i++)
1692
{
1693
LayerPin from = ld.inputBlobsId[i];
1694
CV_Assert(from.valid());
1695
CV_DbgAssert(layers.count(from.lid) && (int)layers[from.lid].outputBlobs.size() > from.oid);
1696
ld.inputBlobs[i] = &layers[from.lid].outputBlobs[from.oid];
1697
ld.inputBlobsWrappers[i] = layers[from.lid].outputBlobsWrappers[from.oid];
1698
}
1699
}
1700
1701
LayersShapesMap::const_iterator layerShapesIt = layersShapes.find(lid);
1702
1703
CV_Assert(layerShapesIt != layersShapes.end());
1704
1705
std::vector<LayerPin> pinsForInternalBlobs;
1706
blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs,
1707
preferableBackend == DNN_BACKEND_OPENCV &&
1708
preferableTarget == DNN_TARGET_OPENCL_FP16);
1709
ld.outputBlobsWrappers.resize(ld.outputBlobs.size());
1710
for (int i = 0; i < ld.outputBlobs.size(); ++i)
1711
{
1712
ld.outputBlobsWrappers[i] = wrap(ld.outputBlobs[i]);
1713
}
1714
ld.internalBlobsWrappers.resize(ld.internals.size());
1715
for (int i = 0; i < ld.internals.size(); ++i)
1716
{
1717
ld.internalBlobsWrappers[i] = wrap(ld.internals[i]);
1718
}
1719
1720
Ptr<Layer> layerPtr = ld.getLayerInstance();
1721
{
1722
std::vector<Mat> inps(ld.inputBlobs.size());
1723
for (int i = 0; i < ld.inputBlobs.size(); ++i)
1724
{
1725
inps[i] = *ld.inputBlobs[i];
1726
}
1727
layerPtr->finalize(inps, ld.outputBlobs);
1728
layerPtr->preferableTarget = preferableTarget;
1729
#if 0
1730
std::cout << "\toutputs:";
1731
size_t noutputs = ld.outputBlobs.size();
1732
for (size_t j = 0; j < noutputs; j++)
1733
{
1734
std::cout << (j == 0 ? " " : ", ") << ld.outputBlobs[j].size;
1735
}
1736
std::cout << "\n";
1737
#endif
1738
}
1739
1740
// After allocation of layer, we decrease counters to it's input blobs.
1741
blobManager.releaseReferences(ld.inputBlobsId);
1742
blobManager.releaseReferences(pinsForInternalBlobs);
1743
1744
ld.flag = 1;
1745
}
1746
1747
#if 0
1748
#define printf_(args) printf args
1749
#else
1750
#define printf_(args)
1751
#endif
1752
1753
void fuseLayers(const std::vector<LayerPin>& blobsToKeep_)
1754
{
1755
if( !fusion || preferableBackend != DNN_BACKEND_OPENCV &&
1756
preferableBackend != DNN_BACKEND_INFERENCE_ENGINE)
1757
return;
1758
1759
CV_TRACE_FUNCTION();
1760
1761
// scan through all the layers. If there is convolution layer followed by the activation layer,
1762
// we try to embed this activation into the convolution and disable separate execution of the activation
1763
std::set<LayerPin> pinsToKeep(blobsToKeep_.begin(),
1764
blobsToKeep_.end());
1765
MapIdToLayerData::iterator it;
1766
for (it = layers.begin(); it != layers.end(); it++)
1767
{
1768
int lid = it->first;
1769
LayerData& ld = layers[lid];
1770
if( ld.skip )
1771
{
1772
printf_(("skipped %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str()));
1773
continue;
1774
}
1775
printf_(("analyzing %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str()));
1776
1777
// the optimization #1. try to fuse batch norm, scaling and/or activation layers
1778
// with the current layer if they follow it. Normally, the are fused with the convolution layer,
1779
// but some of them (like activation) may be fused with fully-connected, elemwise (+) and
1780
// some other layers.
1781
Ptr<Layer>& currLayer = ld.layerInstance;
1782
if( ld.consumers.size() == 1 && pinsToKeep.count(LayerPin(lid, 0)) == 0 )
1783
{
1784
LayerData* nextData = &layers[ld.consumers[0].lid];
1785
LayerPin lpNext(ld.consumers[0].lid, 0);
1786
while (nextData)
1787
{
1788
Ptr<Layer> nextLayer = nextData->layerInstance;
1789
if (currLayer->tryFuse(nextLayer))
1790
{
1791
printf_(("\tfused with %s\n", nextLayer->name.c_str()));
1792
nextData->skip = true;
1793
ld.outputBlobs = layers[lpNext.lid].outputBlobs;
1794
ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers;
1795
if (nextData->consumers.size() == 1)
1796
{
1797
int nextLayerId = nextData->consumers[0].lid;
1798
nextData = &layers[nextLayerId];
1799
lpNext = LayerPin(nextLayerId, 0);
1800
}
1801
else
1802
{
1803
nextData = 0;
1804
break;
1805
}
1806
}
1807
else
1808
break;
1809
}
1810
1811
if (preferableBackend != DNN_BACKEND_OPENCV)
1812
continue; // Go to the next layer.
1813
1814
// TODO: OpenCL target support more fusion styles.
1815
if ( preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget) &&
1816
(!cv::ocl::useOpenCL() || (ld.layerInstance->type != "Convolution" &&
1817
ld.layerInstance->type != "MVN" && ld.layerInstance->type != "Pooling" &&
1818
ld.layerInstance->type != "Concat")) )
1819
continue;
1820
1821
while (nextData)
1822
{
1823
// For now, OpenCL target support fusion with activation of ReLU/ChannelsPReLU/Power/Tanh
1824
if (IS_DNN_OPENCL_TARGET(preferableTarget) &&
1825
nextData->type != "ReLU" &&
1826
nextData->type != "ChannelsPReLU" &&
1827
nextData->type != "ReLU6" &&
1828
nextData->type != "TanH" &&
1829
nextData->type != "Power")
1830
break;
1831
1832
Ptr<ActivationLayer> nextActivLayer = nextData->layerInstance.dynamicCast<ActivationLayer>();
1833
if (nextActivLayer.empty())
1834
break;
1835
1836
if (currLayer->setActivation(nextActivLayer))
1837
{
1838
printf_(("\tfused with %s\n", nextActivLayer->name.c_str()));
1839
nextData->skip = true;
1840
ld.outputBlobs = layers[lpNext.lid].outputBlobs;
1841
ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers;
1842
if (nextData->consumers.size() == 1)
1843
{
1844
int nextLayerId = nextData->consumers[0].lid;
1845
nextData = &layers[nextLayerId];
1846
lpNext = LayerPin(nextLayerId, 0);
1847
}
1848
else
1849
{
1850
nextData = 0;
1851
break;
1852
}
1853
}
1854
else
1855
break;
1856
}
1857
1858
// fuse convolution layer followed by eltwise + relu
1859
if ( IS_DNN_OPENCL_TARGET(preferableTarget) )
1860
{
1861
Ptr<EltwiseLayer> nextEltwiseLayer;
1862
if( nextData )
1863
nextEltwiseLayer = nextData->layerInstance.dynamicCast<EltwiseLayer>();
1864
1865
if( !nextEltwiseLayer.empty() && pinsToKeep.count(lpNext) == 0 )
1866
{
1867
LayerData *eltwiseData = nextData;
1868
// go down from the second input and find the first non-skipped layer.
1869
LayerData *downLayerData = &layers[eltwiseData->inputBlobsId[1].lid];
1870
CV_Assert(downLayerData);
1871
while (downLayerData->skip)
1872
{
1873
downLayerData = &layers[downLayerData->inputBlobsId[0].lid];
1874
}
1875
CV_Assert(downLayerData);
1876
1877
// second input layer is current layer.
1878
if ( ld.id == downLayerData->id )
1879
{
1880
// go down from the first input and find the first non-skipped layer
1881
downLayerData = &layers[eltwiseData->inputBlobsId[0].lid];
1882
while (downLayerData->skip)
1883
{
1884
if ( !downLayerData->type.compare("Eltwise") )
1885
downLayerData = &layers[downLayerData->inputBlobsId[1].lid];
1886
else
1887
downLayerData = &layers[downLayerData->inputBlobsId[0].lid];
1888
}
1889
1890
Ptr<ConvolutionLayer> convLayer = downLayerData->layerInstance.dynamicCast<ConvolutionLayer>();
1891
1892
// first input layer is convolution layer
1893
if( !convLayer.empty() && eltwiseData->consumers.size() == 1 )
1894
{
1895
// fuse eltwise + activation layer
1896
LayerData *firstConvLayerData = downLayerData;
1897
{
1898
nextData = &layers[eltwiseData->consumers[0].lid];
1899
lpNext = LayerPin(eltwiseData->consumers[0].lid, 0);
1900
Ptr<ActivationLayer> nextActivLayer;
1901
if( nextData )
1902
nextActivLayer = nextData->layerInstance.dynamicCast<ActivationLayer>();
1903
1904
if( !nextActivLayer.empty() && pinsToKeep.count(lpNext) == 0 &&
1905
(!nextData->type.compare("ReLU") ||
1906
!nextData->type.compare("ChannelsPReLU") ||
1907
!nextData->type.compare("Power")) &&
1908
currLayer->setActivation(nextActivLayer) )
1909
{
1910
CV_Assert(firstConvLayerData->outputBlobsWrappers.size() == 1 && ld.inputBlobsWrappers.size() == 1);
1911
ld.inputBlobsWrappers.push_back(firstConvLayerData->outputBlobsWrappers[0]);
1912
printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str()));
1913
printf_(("\tfused with %s\n", nextActivLayer->name.c_str()));
1914
eltwiseData->skip = true;
1915
nextData->skip = true;
1916
// This optimization for cases like
1917
// some_layer conv
1918
// | |
1919
// +-- eltwise --+
1920
// |
1921
// activ
1922
// This way all the element-wise computations
1923
// (i.e. some_layer+conv or some_layer*conv)
1924
// would be done at [conv] layer. So we need to
1925
// replace [conv]'s output blob to [eltwise]'s one
1926
// considering that [activ] is an in-place layer.
1927
// Also we need to move all the consumers' references.
1928
// To prevent memory collisions (i.e. when input of
1929
// [conv] and output of [eltwise] is the same blob)
1930
// we allocate a new blob.
1931
CV_Assert_N(ld.outputBlobs.size() == 1, ld.outputBlobsWrappers.size() == 1);
1932
ld.outputBlobs[0] = ld.outputBlobs[0].clone();
1933
ld.outputBlobsWrappers[0] = wrap(ld.outputBlobs[0]);
1934
1935
eltwiseData->outputBlobs = ld.outputBlobs;
1936
nextData->outputBlobs = ld.outputBlobs;
1937
eltwiseData->outputBlobsWrappers = ld.outputBlobsWrappers;
1938
nextData->outputBlobsWrappers = ld.outputBlobsWrappers;
1939
1940
// Move references of [activ] layer consumers to the newly allocated blob.
1941
for (int i = 0; i < nextData->consumers.size(); ++i)
1942
{
1943
LayerData& consumer = layers[nextData->consumers[i].lid];
1944
for (int j = 0; j < consumer.inputBlobsId.size(); ++j)
1945
{
1946
if (consumer.inputBlobsId[j].lid == lpNext.lid)
1947
{
1948
consumer.inputBlobs[j] = &ld.outputBlobs[0];
1949
consumer.inputBlobsWrappers[j] = ld.outputBlobsWrappers[0];
1950
break;
1951
}
1952
}
1953
}
1954
}
1955
}
1956
}
1957
}
1958
}
1959
}
1960
}
1961
1962
if (preferableBackend != DNN_BACKEND_OPENCV)
1963
continue; // Go to the next layer.
1964
1965
// the optimization #2. if there is no layer that takes max pooling layer's computed
1966
// max indices (and only some semantical segmentation networks might need this;
1967
// many others only take the maximum values), then we switch the max pooling
1968
// layer to the faster operating mode.
1969
Ptr<PoolingLayer> poolingLayer = ld.layerInstance.dynamicCast<PoolingLayer>();
1970
if( !poolingLayer.empty() && !ld.consumers.empty() )
1971
{
1972
size_t i = 0, nconsumers = ld.consumers.size();
1973
for( ; i < nconsumers; i++ )
1974
if( ld.consumers[i].oid > 0 )
1975
break;
1976
// if there is no layer that takes the second output pin of the pooling layer
1977
// on input then we don't need to compute the indices
1978
if( i >= nconsumers )
1979
{
1980
poolingLayer->computeMaxIdx = false;
1981
printf_(("\tsimplified pooling layer %s\n", poolingLayer->name.c_str()));
1982
}
1983
}
1984
1985
// the optimization #3. if there is concat layer that concatenates channels
1986
// from the inputs together (i.e. axis == 1) then we make the inputs of
1987
// the concat layer to write to the concatenation output buffer
1988
// (and so we eliminate the concatenation layer, because the channels
1989
// are concatenated implicitly).
1990
Ptr<ConcatLayer> concatLayer = ld.layerInstance.dynamicCast<ConcatLayer>();
1991
if( !concatLayer.empty() && concatLayer->axis == 1 && !concatLayer->padding &&
1992
ld.outputBlobs.size() == 1 )
1993
{
1994
Mat& output = ld.outputBlobs[0];
1995
UMat umat_output;
1996
if (!ld.outputBlobsWrappers.empty() &&
1997
(preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget)))
1998
{
1999
size_t i, ninputs = ld.inputBlobsId.size();
2000
bool conv_layer = true;
2001
for( i = 0; i < ninputs; i++ )
2002
{
2003
LayerPin pin = ld.inputBlobsId[i];
2004
LayerData* inp_i_data = &layers[pin.lid];
2005
while(inp_i_data->skip &&
2006
inp_i_data->inputBlobsId.size() == 1 &&
2007
inp_i_data->consumers.size() == 1)
2008
{
2009
pin = inp_i_data->inputBlobsId[0];
2010
inp_i_data = &layers[pin.lid];
2011
}
2012
conv_layer = conv_layer && (inp_i_data->getLayerInstance()->type == "Convolution");
2013
}
2014
if (!conv_layer)
2015
continue;
2016
std::vector<UMat> umat_outputBlobs;
2017
umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
2018
umat_output = umat_outputBlobs[0];
2019
}
2020
2021
// TODO: in general, this optimization can always be done, but
2022
// many layers currently check that the input/output blobs are
2023
// continuous arrays. Unfortunately, this is not true when
2024
// the concatenation optimization is applied with batch_size > 1.
2025
// so, for now, we only apply this optimization in the most popular
2026
// case batch_size == 1.
2027
if( output.dims == 4 && output.size[0] == 1 )
2028
{
2029
size_t i, ninputs = ld.inputBlobsId.size();
2030
std::vector<LayerPin> realinputs(ninputs);
2031
for( i = 0; i < ninputs; i++ )
2032
{
2033
LayerPin pin = ld.inputBlobsId[i];
2034
LayerData* inp_i_data = &layers[pin.lid];
2035
while(inp_i_data->skip &&
2036
inp_i_data->inputBlobsId.size() == 1 &&
2037
inp_i_data->consumers.size() == 1)
2038
{
2039
pin = inp_i_data->inputBlobsId[0];
2040
inp_i_data = &layers[pin.lid];
2041
}
2042
printf_(("\treal input for %s is %s\n",
2043
layers[ld.inputBlobsId[i].lid].getLayerInstance()->name.c_str(),
2044
inp_i_data->getLayerInstance()->name.c_str()));
2045
2046
if(inp_i_data->skip || inp_i_data->consumers.size() != 1)
2047
break;
2048
realinputs[i] = pin;
2049
}
2050
2051
if( i >= ninputs )
2052
{
2053
// Allocate new memory to prevent collisions during memory
2054
// reusing (see https://github.com/opencv/opencv/pull/10456).
2055
output = output.clone();
2056
if (preferableBackend == DNN_BACKEND_OPENCV &&
2057
IS_DNN_OPENCL_TARGET(preferableTarget))
2058
{
2059
std::vector<UMat> umats(1);
2060
umat_output = umat_output.clone();
2061
umats[0] = umat_output;
2062
OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umats);
2063
}
2064
Range chrange[] = { Range::all(), Range::all(), Range::all(), Range::all() };
2065
int ofs = 0;
2066
for( i = 0; i < ninputs; i++ )
2067
{
2068
LayerPin pin = realinputs[i];
2069
LayerData* inp_i_data = &layers[pin.lid];
2070
int channels_i = ld.inputBlobs[i]->size[1];
2071
chrange[1] = Range(ofs, ofs + channels_i);
2072
printf_(("\toutput %s(%d) to channels (%d, %d)\n", inp_i_data->layerInstance->name.c_str(),
2073
pin.oid, ofs, ofs + channels_i));
2074
ofs += channels_i;
2075
Mat output_slice = output(chrange);
2076
Mat& curr_output = inp_i_data->outputBlobs[pin.oid];
2077
CV_Assert(output_slice.isContinuous() && output_slice.size == curr_output.size);
2078
Mat* oldPtr = &curr_output;
2079
curr_output = output_slice;
2080
if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
2081
{
2082
std::vector<UMat> umats(inp_i_data->outputBlobsWrappers.size());
2083
umats[pin.oid] = umat_output(chrange);
2084
OpenCLBackendWrapper::update(inp_i_data->outputBlobsWrappers, umats);
2085
}
2086
// Layers that refer old input Mat will refer to the
2087
// new data but the same Mat object.
2088
CV_Assert_N(curr_output.data == output_slice.data, oldPtr == &curr_output);
2089
}
2090
ld.skip = true;
2091
printf_(("\toptimized out Concat layer %s\n", concatLayer->name.c_str()));
2092
}
2093
}
2094
}
2095
}
2096
}
2097
2098
void allocateLayers(const std::vector<LayerPin>& blobsToKeep_)
2099
{
2100
CV_TRACE_FUNCTION();
2101
2102
MapIdToLayerData::iterator it;
2103
for (it = layers.begin(); it != layers.end(); it++)
2104
it->second.flag = 0;
2105
2106
CV_Assert(!layers[0].outputBlobs.empty());
2107
ShapesVec inputShapes;
2108
for(int i = 0; i < layers[0].outputBlobs.size(); i++)
2109
{
2110
Mat& inp = layers[0].outputBlobs[i];
2111
CV_Assert(inp.total());
2112
if (preferableBackend == DNN_BACKEND_OPENCV &&
2113
preferableTarget == DNN_TARGET_OPENCL_FP16)
2114
{
2115
layers[0].outputBlobs[i].create(inp.dims, inp.size, CV_16S);
2116
}
2117
inputShapes.push_back(shape(inp));
2118
}
2119
LayersShapesMap layersShapes;
2120
getLayersShapes(inputShapes, layersShapes);
2121
2122
blobManager.reset();
2123
backendWrappers.clear();
2124
// Fake references to input blobs.
2125
for (int i = 0; i < layers[0].outputBlobs.size(); ++i)
2126
blobManager.addReference(LayerPin(0, i));
2127
for (it = layers.begin(); it != layers.end(); ++it)
2128
{
2129
const LayerData& ld = it->second;
2130
blobManager.addReferences(ld.inputBlobsId);
2131
}
2132
2133
for (int i = 0; i < blobsToKeep_.size(); i++)
2134
{
2135
blobManager.addReference(blobsToKeep_[i]);
2136
}
2137
2138
for (it = layers.begin(); it != layers.end(); it++)
2139
{
2140
int lid = it->first;
2141
allocateLayer(lid, layersShapes);
2142
}
2143
2144
layersTimings.resize(lastLayerId + 1, 0);
2145
fuseLayers(blobsToKeep_);
2146
}
2147
2148
void forwardLayer(LayerData &ld)
2149
{
2150
CV_TRACE_FUNCTION();
2151
2152
Ptr<Layer> layer = ld.layerInstance;
2153
2154
TickMeter tm;
2155
tm.start();
2156
2157
if( !ld.skip )
2158
{
2159
std::map<int, Ptr<BackendNode> >::iterator it = ld.backendNodes.find(preferableBackend);
2160
if (preferableBackend == DNN_BACKEND_OPENCV || it == ld.backendNodes.end() || it->second.empty())
2161
{
2162
if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
2163
{
2164
std::vector<UMat> umat_inputBlobs = OpenCLBackendWrapper::getUMatVector(ld.inputBlobsWrappers);
2165
std::vector<UMat> umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
2166
std::vector<UMat> umat_internalBlobs = OpenCLBackendWrapper::getUMatVector(ld.internalBlobsWrappers);
2167
layer->forward(umat_inputBlobs,
2168
umat_outputBlobs,
2169
umat_internalBlobs);
2170
if (DNN_CHECK_NAN_INF)
2171
{
2172
bool fail = false;
2173
for (size_t i = 0; i < umat_outputBlobs.size(); ++i)
2174
{
2175
UMat& u = umat_outputBlobs[i];
2176
Mat m;
2177
if (u.depth() == CV_16S) // FP16
2178
convertFp16(u, m);
2179
else
2180
m = u.getMat(ACCESS_READ);
2181
if (!checkRange(m))
2182
{
2183
std::cerr << "WARNING: NaN detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
2184
std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
2185
fail = true;
2186
}
2187
else if (!checkRange(m, true, NULL, -1e6, 1e6))
2188
{
2189
std::cerr << "WARNING: Inf detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
2190
std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
2191
fail = true;
2192
}
2193
}
2194
if (fail)
2195
{
2196
for (size_t i = 0; i < umat_inputBlobs.size(); ++i)
2197
{
2198
UMat& u = umat_inputBlobs[i];
2199
Mat m;
2200
if (u.depth() == CV_16S) // FP16
2201
convertFp16(u, m);
2202
else
2203
m = u.getMat(ACCESS_READ);
2204
std::cout << "INPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl;
2205
if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
2206
}
2207
for (size_t i = 0; i < umat_outputBlobs.size(); ++i)
2208
{
2209
UMat& u = umat_outputBlobs[i];
2210
Mat m;
2211
if (u.depth() == CV_16S) // FP16
2212
convertFp16(u, m);
2213
else
2214
m = u.getMat(ACCESS_READ);
2215
std::cout << "OUTPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl;
2216
if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
2217
}
2218
for (size_t i = 0; i < umat_internalBlobs.size(); ++i)
2219
{
2220
UMat& u = umat_internalBlobs[i];
2221
Mat m;
2222
if (u.depth() == CV_16S) // FP16
2223
convertFp16(u, m);
2224
else
2225
m = u.getMat(ACCESS_READ);
2226
std::cout << "INTERNAL " << i << " " << shape(m) << std::endl;
2227
if (DNN_CHECK_NAN_INF_DUMP) std::cout << cv::typeToString(u.type()) << " " << m.reshape(1, 1) << std::endl;
2228
}
2229
if (DNN_CHECK_NAN_INF_RAISE_ERROR)
2230
CV_Assert(!fail);
2231
}
2232
}
2233
OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umat_outputBlobs);
2234
}
2235
else
2236
{
2237
for (int i = 0, n = ld.inputBlobsWrappers.size(); i < n; ++i)
2238
{
2239
if (!ld.inputBlobsWrappers[i].empty())
2240
ld.inputBlobsWrappers[i]->copyToHost();
2241
}
2242
2243
std::vector<Mat> inps(ld.inputBlobs.size());
2244
for (int i = 0; i < ld.inputBlobs.size(); ++i)
2245
{
2246
inps[i] = *ld.inputBlobs[i];
2247
}
2248
layer->forward(inps, ld.outputBlobs, ld.internals);
2249
2250
if (DNN_CHECK_NAN_INF)
2251
{
2252
bool fail = false;
2253
for (size_t i = 0; i < ld.outputBlobs.size(); ++i)
2254
{
2255
const Mat& m = ld.outputBlobs[i];
2256
if (!checkRange(m))
2257
{
2258
std::cerr << "WARNING: NaN detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
2259
std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
2260
fail = true;
2261
}
2262
else if (!checkRange(m, true, NULL, -1e6, 1e6))
2263
{
2264
std::cerr << "WARNING: Inf detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
2265
std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
2266
fail = true;
2267
}
2268
}
2269
if (fail)
2270
{
2271
for (size_t i = 0; i < ld.inputBlobs.size(); ++i)
2272
{
2273
const Mat* pM = ld.inputBlobs[i];
2274
if (!pM)
2275
{
2276
std::cout << "INPUT " << i << " is NULL" << std::endl;
2277
continue;
2278
}
2279
const Mat& m = *pM;
2280
std::cout << "INPUT " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl;
2281
if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
2282
}
2283
for (size_t i = 0; i < ld.outputBlobs.size(); ++i)
2284
{
2285
const Mat& m = ld.outputBlobs[i];
2286
std::cout << "OUTPUT " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl;
2287
if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
2288
}
2289
for (size_t i = 0; i < ld.internals.size(); ++i)
2290
{
2291
const Mat& m = ld.internals[i];
2292
std::cout << "INTERNAL " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl;
2293
if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
2294
}
2295
if (DNN_CHECK_NAN_INF_RAISE_ERROR)
2296
CV_Assert(!fail);
2297
}
2298
}
2299
2300
for (int i = 0, n = ld.outputBlobsWrappers.size(); i < n; ++i)
2301
{
2302
if (!ld.outputBlobsWrappers[i].empty())
2303
ld.outputBlobsWrappers[i]->setHostDirty();
2304
}
2305
}
2306
}
2307
else
2308
{
2309
Ptr<BackendNode> node = it->second;
2310
CV_Assert(!node.empty());
2311
if (preferableBackend == DNN_BACKEND_HALIDE)
2312
{
2313
forwardHalide(ld.outputBlobsWrappers, node);
2314
}
2315
else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE)
2316
{
2317
forwardInfEngine(node);
2318
}
2319
else if (preferableBackend == DNN_BACKEND_VKCOM)
2320
{
2321
forwardVkCom(ld.outputBlobsWrappers, node);
2322
}
2323
else
2324
{
2325
CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
2326
}
2327
}
2328
}
2329
else
2330
tm.reset();
2331
2332
tm.stop();
2333
layersTimings[ld.id] = tm.getTimeTicks();
2334
2335
ld.flag = 1;
2336
}
2337
2338
void forwardToLayer(LayerData &ld, bool clearFlags = true)
2339
{
2340
CV_TRACE_FUNCTION();
2341
2342
if (clearFlags)
2343
{
2344
MapIdToLayerData::iterator it;
2345
for (it = layers.begin(); it != layers.end(); it++)
2346
it->second.flag = 0;
2347
}
2348
2349
//already was forwarded
2350
if (ld.flag)
2351
return;
2352
2353
//forward parents
2354
MapIdToLayerData::iterator it;
2355
for (it = layers.begin(); it != layers.end() && (it->second.id < ld.id); ++it)
2356
{
2357
LayerData &ld = it->second;
2358
if (ld.flag)
2359
continue;
2360
forwardLayer(ld);
2361
}
2362
2363
//forward itself
2364
forwardLayer(ld);
2365
}
2366
2367
void forwardAll()
2368
{
2369
CV_TRACE_FUNCTION();
2370
2371
MapIdToLayerData::reverse_iterator last_layer = layers.rbegin();
2372
CV_Assert(last_layer != layers.rend());
2373
forwardToLayer(last_layer->second, true);
2374
}
2375
2376
void getLayerShapesRecursively(int id, LayersShapesMap& inOutShapes)
2377
{
2378
std::vector<LayerPin>& inputLayerIds = layers[id].inputBlobsId;
2379
2380
if (inOutShapes[id].in.empty())
2381
{
2382
for(int i = 0; i < inputLayerIds.size(); i++)
2383
{
2384
int layerId = inputLayerIds[i].lid;
2385
LayersShapesMap::iterator it =
2386
inOutShapes.find(layerId);
2387
if(it == inOutShapes.end() ||
2388
it->second.out.empty())
2389
{
2390
getLayerShapesRecursively(layerId, inOutShapes);
2391
}
2392
const MatShape& shape = inOutShapes[layerId].out[inputLayerIds[i].oid];
2393
inOutShapes[id].in.push_back(shape);
2394
}
2395
}
2396
const ShapesVec& is = inOutShapes[id].in;
2397
ShapesVec& os = inOutShapes[id].out;
2398
ShapesVec& ints = inOutShapes[id].internal;
2399
int requiredOutputs = layers[id].requiredOutputs.size();
2400
inOutShapes[id].supportInPlace =
2401
layers[id].getLayerInstance()->getMemoryShapes(is, requiredOutputs, os, ints);
2402
}
2403
2404
void getLayersShapes(const ShapesVec& netInputShapes,
2405
LayersShapesMap& inOutShapes)
2406
{
2407
inOutShapes.clear();
2408
2409
inOutShapes[0].in = netInputShapes; //insert shape for first input layer
2410
for (MapIdToLayerData::iterator it = layers.begin();
2411
it != layers.end(); it++)
2412
{
2413
getLayerShapesRecursively(it->first, inOutShapes);
2414
}
2415
}
2416
2417
void getLayerShapes(const ShapesVec& netInputShapes,
2418
const int layerId,
2419
LayerShapes& shapes)
2420
{
2421
LayersShapesMap inOutShapes;
2422
inOutShapes[0].in = netInputShapes; //insert shape for first input layer
2423
getLayerShapesRecursively(layerId, inOutShapes);
2424
shapes = inOutShapes[layerId];
2425
}
2426
2427
LayerPin getLatestLayerPin(const std::vector<LayerPin>& pins)
2428
{
2429
return *std::max_element(pins.begin(), pins.end());
2430
}
2431
2432
Mat getBlob(const LayerPin& pin)
2433
{
2434
CV_TRACE_FUNCTION();
2435
2436
if (!pin.valid())
2437
CV_Error(Error::StsObjectNotFound, "Requested blob not found");
2438
2439
LayerData &ld = layers[pin.lid];
2440
if ((size_t)pin.oid >= ld.outputBlobs.size())
2441
{
2442
CV_Error(Error::StsOutOfRange, format("Layer \"%s\" produce only %zu outputs, "
2443
"the #%d was requested", ld.name.c_str(),
2444
ld.outputBlobs.size(), pin.oid));
2445
}
2446
if (preferableTarget != DNN_TARGET_CPU)
2447
{
2448
CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty());
2449
// Transfer data to CPU if it's require.
2450
ld.outputBlobsWrappers[pin.oid]->copyToHost();
2451
}
2452
2453
if (ld.outputBlobs[pin.oid].depth() == CV_16S)
2454
{
2455
convertFp16(ld.outputBlobs[pin.oid], output_blob);
2456
return output_blob;
2457
}
2458
else
2459
return ld.outputBlobs[pin.oid];
2460
}
2461
2462
Mat getBlob(String outputName)
2463
{
2464
return getBlob(getPinByAlias(outputName));
2465
}
2466
};
2467
2468
Net::Net() : impl(new Net::Impl)
2469
{
2470
}
2471
2472
Net Net::readFromModelOptimizer(const String& xml, const String& bin)
2473
{
2474
#ifndef HAVE_INF_ENGINE
2475
CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer.");
2476
#else
2477
InferenceEngine::CNNNetReader reader;
2478
reader.ReadNetwork(xml);
2479
reader.ReadWeights(bin);
2480
2481
InferenceEngine::CNNNetwork ieNet = reader.getNetwork();
2482
2483
std::vector<String> inputsNames;
2484
for (auto& it : ieNet.getInputsInfo())
2485
{
2486
inputsNames.push_back(it.first);
2487
}
2488
2489
Net cvNet;
2490
cvNet.setInputsNames(inputsNames);
2491
2492
Ptr<InfEngineBackendNode> backendNode(new InfEngineBackendNode(0));
2493
backendNode->net = Ptr<InfEngineBackendNet>(new InfEngineBackendNet(ieNet));
2494
for (auto& it : ieNet.getOutputsInfo())
2495
{
2496
Ptr<Layer> cvLayer(new InfEngineBackendLayer(it.second));
2497
InferenceEngine::CNNLayerPtr ieLayer = ieNet.getLayerByName(it.first.c_str());
2498
CV_Assert(ieLayer);
2499
2500
LayerParams lp;
2501
int lid = cvNet.addLayer(it.first, "", lp);
2502
2503
LayerData& ld = cvNet.impl->layers[lid];
2504
cvLayer->name = it.first;
2505
cvLayer->type = ieLayer->type;
2506
ld.layerInstance = cvLayer;
2507
ld.backendNodes[DNN_BACKEND_INFERENCE_ENGINE] = backendNode;
2508
2509
for (int i = 0; i < inputsNames.size(); ++i)
2510
cvNet.connect(0, i, lid, i);
2511
}
2512
cvNet.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE);
2513
2514
cvNet.impl->skipInfEngineInit = true;
2515
return cvNet;
2516
#endif // HAVE_INF_ENGINE
2517
}
2518
2519
Net::~Net()
2520
{
2521
}
2522
2523
int Net::addLayer(const String &name, const String &type, LayerParams &params)
2524
{
2525
CV_TRACE_FUNCTION();
2526
2527
if (impl->getLayerId(name) >= 0)
2528
{
2529
CV_Error(Error::StsBadArg, "Layer \"" + name + "\" already into net");
2530
return -1;
2531
}
2532
2533
int id = ++impl->lastLayerId;
2534
impl->layerNameToId.insert(std::make_pair(name, id));
2535
impl->layers.insert(std::make_pair(id, LayerData(id, name, type, params)));
2536
2537
return id;
2538
}
2539
2540
int Net::addLayerToPrev(const String &name, const String &type, LayerParams &params)
2541
{
2542
CV_TRACE_FUNCTION();
2543
2544
int prvLid = impl->lastLayerId;
2545
int newLid = this->addLayer(name, type, params);
2546
this->connect(prvLid, 0, newLid, 0);
2547
return newLid;
2548
}
2549
2550
void Net::connect(int outLayerId, int outNum, int inpLayerId, int inpNum)
2551
{
2552
CV_TRACE_FUNCTION();
2553
2554
impl->connect(outLayerId, outNum, inpLayerId, inpNum);
2555
}
2556
2557
void Net::connect(String _outPin, String _inPin)
2558
{
2559
CV_TRACE_FUNCTION();
2560
2561
LayerPin outPin = impl->getPinByAlias(_outPin);
2562
LayerPin inpPin = impl->getPinByAlias(_inPin);
2563
2564
CV_Assert(outPin.valid() && inpPin.valid());
2565
2566
impl->connect(outPin.lid, outPin.oid, inpPin.lid, inpPin.oid);
2567
}
2568
2569
Mat Net::forward(const String& outputName)
2570
{
2571
CV_TRACE_FUNCTION();
2572
2573
String layerName = outputName;
2574
2575
if (layerName.empty())
2576
layerName = getLayerNames().back();
2577
2578
std::vector<LayerPin> pins(1, impl->getPinByAlias(layerName));
2579
impl->setUpNet(pins);
2580
impl->forwardToLayer(impl->getLayerData(layerName));
2581
2582
return impl->getBlob(layerName);
2583
}
2584
2585
void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName)
2586
{
2587
CV_TRACE_FUNCTION();
2588
2589
String layerName = outputName;
2590
2591
if (layerName.empty())
2592
layerName = getLayerNames().back();
2593
2594
std::vector<LayerPin> pins(1, impl->getPinByAlias(layerName));
2595
impl->setUpNet(pins);
2596
impl->forwardToLayer(impl->getLayerData(layerName));
2597
2598
LayerPin pin = impl->getPinByAlias(layerName);
2599
LayerData &ld = impl->layers[pin.lid];
2600
2601
if (outputBlobs.isUMat())
2602
{
2603
impl->getBlob(layerName).copyTo(outputBlobs);
2604
}
2605
else if (outputBlobs.isMat())
2606
{
2607
outputBlobs.assign(impl->getBlob(layerName));
2608
}
2609
else if (outputBlobs.isMatVector())
2610
{
2611
if (impl->preferableTarget != DNN_TARGET_CPU)
2612
{
2613
for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
2614
{
2615
CV_Assert(!ld.outputBlobsWrappers[i].empty());
2616
ld.outputBlobsWrappers[i]->copyToHost();
2617
}
2618
}
2619
if (ld.outputBlobs[0].depth() == CV_32F)
2620
{
2621
std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
2622
outputvec = ld.outputBlobs;
2623
} else {
2624
std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
2625
outputvec.resize(ld.outputBlobs.size());
2626
for (int i = 0; i < outputvec.size(); i++)
2627
convertFp16(ld.outputBlobs[i], outputvec[i]);
2628
}
2629
}
2630
else if (outputBlobs.isUMatVector())
2631
{
2632
std::vector<UMat> & outputvec = *(std::vector<UMat> *)outputBlobs.getObj();
2633
2634
if (impl->preferableBackend == DNN_BACKEND_OPENCV &&
2635
IS_DNN_OPENCL_TARGET(impl->preferableTarget))
2636
{
2637
if (impl->preferableTarget == DNN_TARGET_OPENCL)
2638
outputvec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
2639
else if (impl->preferableTarget == DNN_TARGET_OPENCL_FP16)
2640
{
2641
std::vector<UMat> out_vec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
2642
outputvec.resize(out_vec.size());
2643
for (int i = 0; i < out_vec.size(); i++)
2644
convertFp16(out_vec[i], outputvec[i]);
2645
}
2646
}
2647
else
2648
{
2649
outputvec.resize(ld.outputBlobs.size());
2650
for (int i = 0; i < outputvec.size(); ++i)
2651
ld.outputBlobs[i].copyTo(outputvec[i]);
2652
}
2653
}
2654
}
2655
2656
void Net::forward(OutputArrayOfArrays outputBlobs,
2657
const std::vector<String>& outBlobNames)
2658
{
2659
CV_TRACE_FUNCTION();
2660
2661
std::vector<LayerPin> pins;
2662
for (int i = 0; i < outBlobNames.size(); i++)
2663
{
2664
pins.push_back(impl->getPinByAlias(outBlobNames[i]));
2665
}
2666
2667
impl->setUpNet(pins);
2668
2669
LayerPin out = impl->getLatestLayerPin(pins);
2670
2671
impl->forwardToLayer(impl->getLayerData(out.lid));
2672
2673
std::vector<Mat> matvec;
2674
for (int i = 0; i < pins.size(); i++)
2675
{
2676
matvec.push_back(impl->getBlob(pins[i]));
2677
}
2678
2679
std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
2680
outputvec = matvec;
2681
}
2682
2683
void Net::forward(std::vector<std::vector<Mat> >& outputBlobs,
2684
const std::vector<String>& outBlobNames)
2685
{
2686
CV_TRACE_FUNCTION();
2687
2688
std::vector<LayerPin> pins;
2689
for (int i = 0; i < outBlobNames.size(); i++)
2690
{
2691
std::vector<LayerPin> lp = impl->getLayerOutPins(outBlobNames[i]);
2692
pins.insert(pins.end(), lp.begin(), lp.end());
2693
}
2694
2695
impl->setUpNet(pins);
2696
2697
LayerPin out = impl->getLatestLayerPin(pins);
2698
2699
impl->forwardToLayer(impl->getLayerData(out.lid));
2700
2701
outputBlobs.resize(outBlobNames.size());
2702
for (int i = 0; i < outBlobNames.size(); i++)
2703
{
2704
std::vector<LayerPin> lp = impl->getLayerOutPins(outBlobNames[i]);
2705
for (int i = 0; i < lp.size(); i++)
2706
{
2707
outputBlobs[i].push_back(impl->getBlob(lp[i]));
2708
}
2709
}
2710
}
2711
2712
void Net::setPreferableBackend(int backendId)
2713
{
2714
CV_TRACE_FUNCTION();
2715
CV_TRACE_ARG(backendId);
2716
2717
if( impl->preferableBackend != backendId )
2718
{
2719
impl->preferableBackend = backendId;
2720
impl->netWasAllocated = false;
2721
impl->clear();
2722
}
2723
}
2724
2725
void Net::setPreferableTarget(int targetId)
2726
{
2727
CV_TRACE_FUNCTION();
2728
CV_TRACE_ARG(targetId);
2729
2730
if( impl->preferableTarget != targetId )
2731
{
2732
impl->preferableTarget = targetId;
2733
if (IS_DNN_OPENCL_TARGET(targetId))
2734
{
2735
#ifndef HAVE_OPENCL
2736
#ifdef HAVE_INF_ENGINE
2737
if (impl->preferableBackend == DNN_BACKEND_OPENCV)
2738
#else
2739
if (impl->preferableBackend == DNN_BACKEND_DEFAULT ||
2740
impl->preferableBackend == DNN_BACKEND_OPENCV)
2741
#endif // HAVE_INF_ENGINE
2742
impl->preferableTarget = DNN_TARGET_CPU;
2743
#else
2744
bool fp16 = ocl::Device::getDefault().isExtensionSupported("cl_khr_fp16");
2745
if (!fp16 && targetId == DNN_TARGET_OPENCL_FP16)
2746
impl->preferableTarget = DNN_TARGET_OPENCL;
2747
#endif
2748
}
2749
impl->netWasAllocated = false;
2750
impl->clear();
2751
}
2752
}
2753
2754
void Net::setInputsNames(const std::vector<String> &inputBlobNames)
2755
{
2756
CV_TRACE_FUNCTION();
2757
2758
impl->netInputLayer->setNames(inputBlobNames);
2759
}
2760
2761
void Net::setInput(InputArray blob, const String& name, double scalefactor, const Scalar& mean)
2762
{
2763
CV_TRACE_FUNCTION();
2764
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
2765
2766
LayerPin pin;
2767
pin.lid = 0;
2768
pin.oid = impl->resolvePinOutputName(impl->getLayerData(pin.lid), name);
2769
2770
if (!pin.valid())
2771
CV_Error(Error::StsObjectNotFound, "Requested blob \"" + name + "\" not found");
2772
2773
LayerData &ld = impl->layers[pin.lid];
2774
const int numInputs = std::max(pin.oid+1, (int)ld.requiredOutputs.size());
2775
ld.outputBlobs.resize(numInputs);
2776
ld.outputBlobsWrappers.resize(numInputs);
2777
impl->netInputLayer->inputsData.resize(numInputs);
2778
impl->netInputLayer->scaleFactors.resize(numInputs);
2779
impl->netInputLayer->means.resize(numInputs);
2780
2781
MatShape prevShape = shape(impl->netInputLayer->inputsData[pin.oid]);
2782
Mat blob_ = blob.getMat();
2783
bool oldShape = prevShape == shape(blob_);
2784
if (oldShape)
2785
{
2786
blob_.copyTo(impl->netInputLayer->inputsData[pin.oid]);
2787
}
2788
else
2789
{
2790
ld.outputBlobs[pin.oid] = blob_.clone();
2791
impl->netInputLayer->inputsData[pin.oid] = ld.outputBlobs[pin.oid];
2792
}
2793
2794
if (!ld.outputBlobsWrappers[pin.oid].empty())
2795
{
2796
ld.outputBlobsWrappers[pin.oid]->setHostDirty();
2797
}
2798
impl->netInputLayer->scaleFactors[pin.oid] = scalefactor;
2799
impl->netInputLayer->means[pin.oid] = mean;
2800
impl->netWasAllocated = impl->netWasAllocated && oldShape;
2801
}
2802
2803
Mat Net::getParam(LayerId layer, int numParam)
2804
{
2805
LayerData &ld = impl->getLayerData(layer);
2806
std::vector<Mat> &layerBlobs = ld.getLayerInstance()->blobs;
2807
CV_Assert(numParam < (int)layerBlobs.size());
2808
return layerBlobs[numParam];
2809
}
2810
2811
void Net::setParam(LayerId layer, int numParam, const Mat &blob)
2812
{
2813
LayerData &ld = impl->getLayerData(layer);
2814
2815
std::vector<Mat> &layerBlobs = ld.getLayerInstance()->blobs;
2816
CV_Assert(numParam < (int)layerBlobs.size());
2817
//we don't make strong checks, use this function carefully
2818
layerBlobs[numParam] = blob;
2819
}
2820
2821
int Net::getLayerId(const String &layer)
2822
{
2823
return impl->getLayerId(layer);
2824
}
2825
2826
Ptr<Layer> Net::getLayer(LayerId layerId)
2827
{
2828
LayerData &ld = impl->getLayerData(layerId);
2829
return ld.getLayerInstance();
2830
}
2831
2832
std::vector<Ptr<Layer> > Net::getLayerInputs(LayerId layerId)
2833
{
2834
LayerData &ld = impl->getLayerData(layerId);
2835
if (!ld.layerInstance)
2836
CV_Error(Error::StsNullPtr, format("Requested layer \"%s\" was not initialized", ld.name.c_str()));
2837
2838
std::vector<Ptr<Layer> > inputLayers;
2839
inputLayers.reserve(ld.inputLayersId.size());
2840
std::set<int>::iterator it;
2841
for (it = ld.inputLayersId.begin(); it != ld.inputLayersId.end(); ++it) {
2842
inputLayers.push_back(getLayer(*it));
2843
}
2844
return inputLayers;
2845
}
2846
2847
std::vector<String> Net::getLayerNames() const
2848
{
2849
std::vector<String> res;
2850
res.reserve(impl->layers.size());
2851
2852
Impl::MapIdToLayerData::iterator it;
2853
for (it = impl->layers.begin(); it != impl->layers.end(); it++)
2854
{
2855
if (it->second.id) //skip Data layer
2856
res.push_back(it->second.name);
2857
}
2858
2859
return res;
2860
}
2861
2862
bool Net::empty() const
2863
{
2864
return impl->layers.size() <= 1; //first layer is default Data layer
2865
}
2866
2867
std::vector<int> Net::getUnconnectedOutLayers() const
2868
{
2869
std::vector<int> layersIds;
2870
2871
Impl::MapIdToLayerData::iterator it;
2872
for (it = impl->layers.begin(); it != impl->layers.end(); it++)
2873
{
2874
int lid = it->first;
2875
LayerData &ld = it->second;
2876
2877
if (ld.requiredOutputs.size() == 0)
2878
layersIds.push_back(lid);
2879
}
2880
2881
return layersIds;
2882
}
2883
2884
std::vector<String> Net::getUnconnectedOutLayersNames() const
2885
{
2886
std::vector<int> ids = getUnconnectedOutLayers();
2887
const size_t n = ids.size();
2888
std::vector<String> names(n);
2889
for (size_t i = 0; i < n; ++i)
2890
{
2891
names[i] = impl->layers[ids[i]].name;
2892
}
2893
return names;
2894
}
2895
2896
void Net::getLayersShapes(const ShapesVec& netInputShapes,
2897
std::vector<int>& layersIds,
2898
std::vector<ShapesVec>& inLayersShapes,
2899
std::vector<ShapesVec>& outLayersShapes) const
2900
{
2901
layersIds.clear();
2902
inLayersShapes.clear();
2903
outLayersShapes.clear();
2904
2905
Impl::LayersShapesMap inOutShapes;
2906
impl->getLayersShapes(netInputShapes, inOutShapes);
2907
2908
for(Impl::LayersShapesMap::const_iterator it = inOutShapes.begin();
2909
it != inOutShapes.end(); it++)
2910
{
2911
layersIds.push_back(it->first);
2912
inLayersShapes.push_back(it->second.in);
2913
outLayersShapes.push_back(it->second.out);
2914
}
2915
}
2916
2917
void Net::getLayersShapes(const MatShape& netInputShape,
2918
std::vector<int>& layerIds,
2919
std::vector<ShapesVec>& inLayersShapes,
2920
std::vector<ShapesVec>& outLayersShapes) const
2921
{
2922
getLayersShapes(ShapesVec(1, netInputShape),
2923
layerIds, inLayersShapes, outLayersShapes);
2924
}
2925
2926
void Net::getLayerShapes(const MatShape& netInputShape,
2927
const int layerId,
2928
ShapesVec& inLayerShapes,
2929
ShapesVec& outLayerShapes) const
2930
{
2931
getLayerShapes(ShapesVec(1, netInputShape),
2932
layerId, inLayerShapes, outLayerShapes);
2933
2934
}
2935
2936
void Net::getLayerShapes(const ShapesVec& netInputShapes,
2937
const int layerId,
2938
ShapesVec& inLayerShapes,
2939
ShapesVec& outLayerShapes) const
2940
{
2941
LayerShapes shapes;
2942
impl->getLayerShapes(netInputShapes, layerId, shapes);
2943
inLayerShapes = shapes.in;
2944
outLayerShapes = shapes.out;
2945
}
2946
2947
int64 Net::getFLOPS(const std::vector<MatShape>& netInputShapes) const
2948
{
2949
CV_TRACE_FUNCTION();
2950
2951
int64 flops = 0;
2952
std::vector<int> ids;
2953
std::vector<std::vector<MatShape> > inShapes, outShapes;
2954
getLayersShapes(netInputShapes, ids, inShapes, outShapes);
2955
CV_Assert(inShapes.size() == outShapes.size());
2956
CV_Assert(inShapes.size() == ids.size());
2957
2958
for(int i = 0; i < ids.size(); i++)
2959
{
2960
flops += impl->layers[ids[i]].getLayerInstance()->getFLOPS(inShapes[i],
2961
outShapes[i]);
2962
}
2963
2964
return flops;
2965
}
2966
2967
int64 Net::getFLOPS(const MatShape& netInputShape) const
2968
{
2969
return getFLOPS(std::vector<MatShape>(1, netInputShape));
2970
}
2971
2972
int64 Net::getFLOPS(const int layerId,
2973
const std::vector<MatShape>& netInputShapes) const
2974
{
2975
Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerId);
2976
CV_Assert(layer != impl->layers.end());
2977
2978
LayerShapes shapes;
2979
impl->getLayerShapes(netInputShapes, layerId, shapes);
2980
2981
return layer->second.getLayerInstance()->getFLOPS(shapes.in, shapes.out);
2982
}
2983
2984
int64 Net::getFLOPS(const int layerId,
2985
const MatShape& netInputShape) const
2986
{
2987
return getFLOPS(layerId, std::vector<MatShape>(1, netInputShape));
2988
}
2989
2990
void Net::getLayerTypes(std::vector<String>& layersTypes) const
2991
{
2992
layersTypes.clear();
2993
2994
std::map<String, int> layers;
2995
for (Impl::MapIdToLayerData::iterator it = impl->layers.begin();
2996
it != impl->layers.end(); it++)
2997
{
2998
if (layers.find(it->second.type) == layers.end())
2999
layers[it->second.type] = 0;
3000
layers[it->second.type]++;
3001
}
3002
3003
for (std::map<String, int>::iterator it = layers.begin();
3004
it != layers.end(); it++)
3005
{
3006
layersTypes.push_back(it->first);
3007
}
3008
}
3009
3010
int Net::getLayersCount(const String& layerType) const
3011
{
3012
int count = 0;
3013
for (Impl::MapIdToLayerData::iterator it = impl->layers.begin();
3014
it != impl->layers.end(); it++)
3015
{
3016
if (it->second.type == layerType)
3017
count++;
3018
}
3019
return count;
3020
}
3021
3022
void Net::getMemoryConsumption(const int layerId,
3023
const std::vector<MatShape>& netInputShapes,
3024
size_t& weights, size_t& blobs) const
3025
{
3026
CV_TRACE_FUNCTION();
3027
3028
Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerId);
3029
CV_Assert(layer != impl->layers.end());
3030
3031
weights = blobs = 0;
3032
3033
for(int i = 0; i < layer->second.params.blobs.size(); i++)
3034
{
3035
const Mat& weightsBlob = layer->second.params.blobs[i];
3036
weights += weightsBlob.total()*weightsBlob.elemSize();
3037
}
3038
3039
ShapesVec inLayerShapes, outLayerShapes;
3040
getLayerShapes(netInputShapes, layerId, inLayerShapes, outLayerShapes);
3041
for(int i = 0; i < outLayerShapes.size(); i++)
3042
{
3043
blobs += total(outLayerShapes[i]) * sizeof(float);
3044
}
3045
}
3046
3047
void Net::getMemoryConsumption(const std::vector<MatShape>& netInputShapes,
3048
size_t& weights, size_t& blobs) const
3049
{
3050
CV_TRACE_FUNCTION();
3051
3052
std::vector<int> layerIds;
3053
std::vector<size_t> w, b;
3054
getMemoryConsumption(netInputShapes, layerIds, w, b);
3055
3056
weights = blobs = 0;
3057
for(int i = 0; i < layerIds.size(); i++)
3058
{
3059
weights += w[i];
3060
blobs += b[i];
3061
}
3062
}
3063
3064
void Net::getMemoryConsumption(const int layerId,
3065
const MatShape& netInputShape,
3066
size_t& weights, size_t& blobs) const
3067
{
3068
getMemoryConsumption(layerId, std::vector<MatShape>(1, netInputShape),
3069
weights, blobs);
3070
}
3071
3072
void Net::getMemoryConsumption(const MatShape& netInputShape,
3073
size_t& weights, size_t& blobs) const
3074
{
3075
getMemoryConsumption(std::vector<MatShape>(1, netInputShape),
3076
weights, blobs);
3077
}
3078
3079
void Net::getMemoryConsumption(const std::vector<MatShape>& netInputShapes,
3080
std::vector<int>& layerIds, std::vector<size_t>& weights,
3081
std::vector<size_t>& blobs) const
3082
{
3083
CV_TRACE_FUNCTION();
3084
3085
layerIds.clear();
3086
weights.clear();
3087
blobs.clear();
3088
3089
std::vector<std::vector<MatShape> > inLayerShapes, outLayerShapes;
3090
3091
getLayersShapes(netInputShapes, layerIds, inLayerShapes, outLayerShapes);
3092
3093
for(int i = 0; i < layerIds.size(); i++)
3094
{
3095
int w = 0, b = 0;
3096
Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerIds[i]);
3097
CV_Assert(layer != impl->layers.end());
3098
3099
for(int j = 0; j < layer->second.params.blobs.size(); j++)
3100
{
3101
const Mat& weightsBlob = layer->second.params.blobs[j];
3102
w += weightsBlob.total()*weightsBlob.elemSize();
3103
}
3104
3105
for(int j = 0; j < outLayerShapes[i].size(); j++)
3106
{
3107
b += total(outLayerShapes[i][j]) * sizeof(float);
3108
}
3109
3110
weights.push_back(w);
3111
blobs.push_back(b);
3112
}
3113
}
3114
3115
void Net::getMemoryConsumption(const MatShape& netInputShape, std::vector<int>& layerIds,
3116
std::vector<size_t>& weights, std::vector<size_t>& blobs) const
3117
{
3118
getMemoryConsumption(std::vector<MatShape>(1, netInputShape), layerIds,
3119
weights, blobs);
3120
}
3121
3122
void Net::enableFusion(bool fusion)
3123
{
3124
if( impl->fusion != fusion )
3125
{
3126
impl->fusion = fusion;
3127
impl->netWasAllocated = false;
3128
impl->clear();
3129
}
3130
}
3131
3132
void Net::setHalideScheduler(const String& scheduler)
3133
{
3134
CV_TRACE_FUNCTION();
3135
CV_TRACE_ARG_VALUE(scheduler, "scheduler", scheduler.c_str());
3136
3137
impl->halideConfigFile = scheduler;
3138
}
3139
3140
int64 Net::getPerfProfile(std::vector<double>& timings)
3141
{
3142
timings = std::vector<double>(impl->layersTimings.begin() + 1, impl->layersTimings.end());
3143
int64 total = std::accumulate(timings.begin(), timings.end(), 0);
3144
return total;
3145
}
3146
3147
//////////////////////////////////////////////////////////////////////////
3148
3149
Layer::Layer() { preferableTarget = DNN_TARGET_CPU; }
3150
3151
Layer::Layer(const LayerParams &params)
3152
: blobs(params.blobs), name(params.name), type(params.type)
3153
{
3154
preferableTarget = DNN_TARGET_CPU;
3155
}
3156
3157
void Layer::setParamsFrom(const LayerParams &params)
3158
{
3159
blobs = params.blobs;
3160
name = params.name;
3161
type = params.type;
3162
}
3163
3164
int Layer::inputNameToIndex(String)
3165
{
3166
return -1;
3167
}
3168
3169
int Layer::outputNameToIndex(const String&)
3170
{
3171
return 0;
3172
}
3173
3174
bool Layer::supportBackend(int backendId)
3175
{
3176
return backendId == DNN_BACKEND_OPENCV;
3177
}
3178
3179
Ptr<BackendNode> Layer::initVkCom(const std::vector<Ptr<BackendWrapper> > &)
3180
{
3181
CV_Error(Error::StsNotImplemented, "VkCom pipeline of " + type +
3182
" layers is not defined.");
3183
return Ptr<BackendNode>();
3184
}
3185
3186
Ptr<BackendNode> Layer::initHalide(const std::vector<Ptr<BackendWrapper> > &)
3187
{
3188
CV_Error(Error::StsNotImplemented, "Halide pipeline of " + type +
3189
" layers is not defined.");
3190
return Ptr<BackendNode>();
3191
}
3192
3193
Ptr<BackendNode> Layer::initInfEngine(const std::vector<Ptr<BackendWrapper> > &)
3194
{
3195
CV_Error(Error::StsNotImplemented, "Inference Engine pipeline of " + type +
3196
" layers is not defined.");
3197
return Ptr<BackendNode>();
3198
}
3199
3200
void Layer::applyHalideScheduler(Ptr<BackendNode>& node, const std::vector<Mat*> &inputs,
3201
const std::vector<Mat> &outputs, int targetId) const
3202
{
3203
#ifdef HAVE_HALIDE
3204
CV_TRACE_FUNCTION();
3205
3206
Halide::Var x("x"), y("y"), c("c"), n("n"), co("co"), ci("ci"),
3207
xo("xo"), xi("xi"), yo("yo"), yi("yi"), tile("tile");
3208
Halide::Func& top = node.dynamicCast<HalideBackendNode>()->funcs.back();
3209
3210
int outW, outH, outC, outN;
3211
getCanonicalSize(outputs[0].size, &outW, &outH, &outC, &outN);
3212
3213
if (targetId == DNN_TARGET_CPU)
3214
{
3215
if (outW == 1 && outH == 1)
3216
{
3217
if (outC + outN == 1)
3218
return;
3219
3220
if (outC > 8)
3221
top.split(c, co, ci, 8)
3222
.fuse(x, y, tile).fuse(co, tile, tile).fuse(n, tile, tile)
3223
.parallel(tile)
3224
.vectorize(ci, 8);
3225
else
3226
top.fuse(x, y, tile).fuse(c, tile, tile).fuse(n, tile, tile)
3227
.parallel(tile);
3228
}
3229
else
3230
{
3231
if (outH > 2)
3232
{
3233
top.reorder(x, c, y)
3234
.split(y, yo, yi, 2)
3235
.fuse(yo, n, tile)
3236
.parallel(tile)
3237
.unroll(yi)
3238
.vectorize(x, outW >= 16 ? 16 : outW);
3239
}
3240
}
3241
}
3242
else if (targetId == DNN_TARGET_OPENCL)
3243
{
3244
if (outW == 1 && outH == 1)
3245
{
3246
int c_split = outC > 8 ? (outC > 16 ? 8 : 4) : outC;
3247
top.split(c, co, ci, c_split)
3248
.fuse(x, y, tile).fuse(co, tile, tile).fuse(n, tile, tile)
3249
.gpu_blocks(tile)
3250
.gpu_threads(ci);
3251
}
3252
else
3253
{
3254
int x_split = outW > 8 ? (outW >= 32 ? 16 : 8) : outW;
3255
int y_split = outH > 8 ? (outH >= 32 ? 16 : 8) : outH;
3256
// Supported vectorization widths: 2, 3, 4, 8, 16
3257
int c_split = outC > 8 ? (outC > 16 ? 8 : 4) : std::min(4, outC);
3258
top.split(x, xo, xi, x_split).split(y, yo, yi, y_split)
3259
.split(c, co, ci, c_split)
3260
.gpu_blocks(xo, yo, co)
3261
.gpu_threads(xi, yi)
3262
.reorder(xi, yi, ci, xo, yo, co)
3263
.vectorize(ci);
3264
}
3265
}
3266
else
3267
CV_Error(Error::StsNotImplemented, "Unknown target identifier");
3268
#endif // HAVE_HALIDE
3269
}
3270
3271
Ptr<BackendNode> Layer::tryAttach(const Ptr<BackendNode>& node)
3272
{
3273
return Ptr<BackendNode>();
3274
}
3275
3276
bool Layer::setActivation(const Ptr<ActivationLayer>&) { return false; }
3277
bool Layer::tryFuse(Ptr<Layer>&) { return false; }
3278
void Layer::getScaleShift(Mat& scale, Mat& shift) const
3279
{
3280
scale = Mat();
3281
shift = Mat();
3282
}
3283
3284
void Layer::unsetAttached()
3285
{
3286
setActivation(Ptr<ActivationLayer>());
3287
}
3288
3289
template <typename T>
3290
static void vecToPVec(const std::vector<T> &v, std::vector<T*> &pv)
3291
{
3292
pv.resize(v.size());
3293
for (size_t i = 0; i < v.size(); i++)
3294
pv[i] = const_cast<T*>(&v[i]);
3295
}
3296
3297
void Layer::finalize(const std::vector<Mat> &inputs, std::vector<Mat> &outputs)
3298
{
3299
CV_TRACE_FUNCTION();
3300
this->finalize((InputArrayOfArrays)inputs, (OutputArrayOfArrays)outputs);
3301
}
3302
3303
void Layer::finalize(const std::vector<Mat*> &input, std::vector<Mat> &output)
3304
{
3305
CV_UNUSED(input);CV_UNUSED(output);
3306
}
3307
3308
void Layer::finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr)
3309
{
3310
CV_TRACE_FUNCTION();
3311
std::vector<Mat> inputs, outputs;
3312
inputs_arr.getMatVector(inputs);
3313
outputs_arr.getMatVector(outputs);
3314
3315
std::vector<Mat*> inputsp;
3316
vecToPVec(inputs, inputsp);
3317
this->finalize(inputsp, outputs);
3318
}
3319
3320
std::vector<Mat> Layer::finalize(const std::vector<Mat> &inputs)
3321
{
3322
CV_TRACE_FUNCTION();
3323
3324
std::vector<Mat> outputs;
3325
this->finalize(inputs, outputs);
3326
return outputs;
3327
}
3328
3329
void Layer::forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &internals)
3330
{
3331
// We kept this method for compatibility. DNN calls it now only to support users' implementations.
3332
}
3333
3334
void Layer::forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
3335
{
3336
CV_TRACE_FUNCTION();
3337
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
3338
3339
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
3340
}
3341
3342
void Layer::forward_fallback(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
3343
{
3344
CV_TRACE_FUNCTION();
3345
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
3346
3347
if (preferableTarget == DNN_TARGET_OPENCL_FP16 && inputs_arr.depth() == CV_16S)
3348
{
3349
std::vector<UMat> inputs;
3350
std::vector<UMat> outputs;
3351
std::vector<UMat> internals;
3352
3353
std::vector<UMat> orig_inputs;
3354
std::vector<UMat> orig_outputs;
3355
std::vector<UMat> orig_internals;
3356
3357
inputs_arr.getUMatVector(orig_inputs);
3358
outputs_arr.getUMatVector(orig_outputs);
3359
internals_arr.getUMatVector(orig_internals);
3360
3361
inputs.resize(orig_inputs.size());
3362
for (size_t i = 0; i < orig_inputs.size(); i++)
3363
convertFp16(orig_inputs[i], inputs[i]);
3364
3365
outputs.resize(orig_outputs.size());
3366
for (size_t i = 0; i < orig_outputs.size(); i++)
3367
outputs[i].create(shape(orig_outputs[i]), CV_32F);
3368
3369
internals.resize(orig_internals.size());
3370
for (size_t i = 0; i < orig_internals.size(); i++)
3371
internals[i].create(shape(orig_internals[i]), CV_32F);
3372
3373
forward(inputs, outputs, internals);
3374
3375
for (size_t i = 0; i < outputs.size(); i++)
3376
convertFp16(outputs[i], orig_outputs[i]);
3377
3378
// sync results back
3379
outputs_arr.assign(orig_outputs);
3380
internals_arr.assign(orig_internals);
3381
return;
3382
}
3383
std::vector<Mat> inpvec;
3384
std::vector<Mat> outputs;
3385
std::vector<Mat> internals;
3386
3387
inputs_arr.getMatVector(inpvec);
3388
outputs_arr.getMatVector(outputs);
3389
internals_arr.getMatVector(internals);
3390
3391
std::vector<Mat*> inputs(inpvec.size());
3392
for (int i = 0; i < inpvec.size(); i++)
3393
inputs[i] = &inpvec[i];
3394
3395
this->forward(inputs, outputs, internals);
3396
3397
// sync results back
3398
outputs_arr.assign(outputs);
3399
internals_arr.assign(internals);
3400
}
3401
3402
void Layer::run(const std::vector<Mat> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
3403
{
3404
CV_TRACE_FUNCTION();
3405
3406
this->finalize(inputs, outputs);
3407
this->forward(inputs, outputs, internals);
3408
}
3409
3410
Layer::~Layer() {}
3411
3412
bool Layer::getMemoryShapes(const std::vector<MatShape> &inputs,
3413
const int requiredOutputs,
3414
std::vector<MatShape> &outputs,
3415
std::vector<MatShape> &internals) const
3416
{
3417
CV_Assert(inputs.size());
3418
outputs.assign(std::max(requiredOutputs, (int)inputs.size()), inputs[0]);
3419
return false;
3420
}
3421
3422
//////////////////////////////////////////////////////////////////////////
3423
3424
static Mutex& getLayerFactoryMutex()
3425
{
3426
static Mutex* volatile instance = NULL;
3427
if (instance == NULL)
3428
{
3429
cv::AutoLock lock(getInitializationMutex());
3430
if (instance == NULL)
3431
instance = new Mutex();
3432
}
3433
return *instance;
3434
}
3435
3436
typedef std::map<String, std::vector<LayerFactory::Constructor> > LayerFactory_Impl;
3437
3438
static LayerFactory_Impl& getLayerFactoryImpl_()
3439
{
3440
static LayerFactory_Impl impl;
3441
return impl;
3442
}
3443
3444
static LayerFactory_Impl& getLayerFactoryImpl()
3445
{
3446
static LayerFactory_Impl* volatile instance = NULL;
3447
if (instance == NULL)
3448
{
3449
cv::AutoLock lock(getLayerFactoryMutex());
3450
if (instance == NULL)
3451
{
3452
instance = &getLayerFactoryImpl_();
3453
initializeLayerFactory();
3454
}
3455
}
3456
return *instance;
3457
}
3458
3459
void LayerFactory::registerLayer(const String &type, Constructor constructor)
3460
{
3461
CV_TRACE_FUNCTION();
3462
CV_TRACE_ARG_VALUE(type, "type", type.c_str());
3463
3464
cv::AutoLock lock(getLayerFactoryMutex());
3465
String type_ = toLowerCase(type);
3466
LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type_);
3467
3468
if (it != getLayerFactoryImpl().end())
3469
{
3470
if (it->second.back() == constructor)
3471
CV_Error(cv::Error::StsBadArg, "Layer \"" + type_ + "\" already was registered");
3472
it->second.push_back(constructor);
3473
}
3474
getLayerFactoryImpl().insert(std::make_pair(type_, std::vector<Constructor>(1, constructor)));
3475
}
3476
3477
void LayerFactory::unregisterLayer(const String &type)
3478
{
3479
CV_TRACE_FUNCTION();
3480
CV_TRACE_ARG_VALUE(type, "type", type.c_str());
3481
3482
cv::AutoLock lock(getLayerFactoryMutex());
3483
String type_ = toLowerCase(type);
3484
3485
LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type_);
3486
if (it != getLayerFactoryImpl().end())
3487
{
3488
if (it->second.size() > 1)
3489
it->second.pop_back();
3490
else
3491
getLayerFactoryImpl().erase(it);
3492
}
3493
}
3494
3495
Ptr<Layer> LayerFactory::createLayerInstance(const String &type, LayerParams& params)
3496
{
3497
CV_TRACE_FUNCTION();
3498
CV_TRACE_ARG_VALUE(type, "type", type.c_str());
3499
3500
cv::AutoLock lock(getLayerFactoryMutex());
3501
String type_ = toLowerCase(type);
3502
LayerFactory_Impl::const_iterator it = getLayerFactoryImpl().find(type_);
3503
3504
if (it != getLayerFactoryImpl().end())
3505
{
3506
CV_Assert(!it->second.empty());
3507
return it->second.back()(params);
3508
}
3509
else
3510
{
3511
return Ptr<Layer>(); //NULL
3512
}
3513
}
3514
3515
BackendNode::BackendNode(int backendId) : backendId(backendId) {}
3516
3517
BackendNode::~BackendNode() {};
3518
3519
BackendWrapper::BackendWrapper(int backendId, int targetId)
3520
: backendId(backendId), targetId(targetId) {}
3521
3522
BackendWrapper::BackendWrapper(int targetId, const cv::Mat& m)
3523
{
3524
CV_Error(Error::StsNotImplemented,
3525
"Constructor of backend wrapper must be implemented");
3526
}
3527
3528
BackendWrapper::BackendWrapper(const Ptr<BackendWrapper>& base, const MatShape& shape)
3529
{
3530
CV_Error(Error::StsNotImplemented,
3531
"Constructor of backend wrapper must be implemented");
3532
}
3533
3534
BackendWrapper::~BackendWrapper() {}
3535
3536
Net readNet(const String& _model, const String& _config, const String& _framework)
3537
{
3538
String framework = toLowerCase(_framework);
3539
String model = _model;
3540
String config = _config;
3541
const std::string modelExt = model.substr(model.rfind('.') + 1);
3542
const std::string configExt = config.substr(config.rfind('.') + 1);
3543
if (framework == "caffe" || modelExt == "caffemodel" || configExt == "caffemodel" ||
3544
modelExt == "prototxt" || configExt == "prototxt")
3545
{
3546
if (modelExt == "prototxt" || configExt == "caffemodel")
3547
std::swap(model, config);
3548
return readNetFromCaffe(config, model);
3549
}
3550
if (framework == "tensorflow" || modelExt == "pb" || configExt == "pb" ||
3551
modelExt == "pbtxt" || configExt == "pbtxt")
3552
{
3553
if (modelExt == "pbtxt" || configExt == "pb")
3554
std::swap(model, config);
3555
return readNetFromTensorflow(model, config);
3556
}
3557
if (framework == "torch" || modelExt == "t7" || modelExt == "net" ||
3558
configExt == "t7" || configExt == "net")
3559
{
3560
return readNetFromTorch(model.empty() ? config : model);
3561
}
3562
if (framework == "darknet" || modelExt == "weights" || configExt == "weights" ||
3563
modelExt == "cfg" || configExt == "cfg")
3564
{
3565
if (modelExt == "cfg" || configExt == "weights")
3566
std::swap(model, config);
3567
return readNetFromDarknet(config, model);
3568
}
3569
if (framework == "dldt" || modelExt == "bin" || configExt == "bin" ||
3570
modelExt == "xml" || configExt == "xml")
3571
{
3572
if (modelExt == "xml" || configExt == "bin")
3573
std::swap(model, config);
3574
return readNetFromModelOptimizer(config, model);
3575
}
3576
if (framework == "onnx" || modelExt == "onnx")
3577
{
3578
return readNetFromONNX(model);
3579
}
3580
CV_Error(Error::StsError, "Cannot determine an origin framework of files: " +
3581
model + (config.empty() ? "" : ", " + config));
3582
}
3583
3584
Net readNet(const String& _framework, const std::vector<uchar>& bufferModel,
3585
const std::vector<uchar>& bufferConfig)
3586
{
3587
String framework = toLowerCase(_framework);
3588
if (framework == "caffe")
3589
return readNetFromCaffe(bufferConfig, bufferModel);
3590
else if (framework == "tensorflow")
3591
return readNetFromTensorflow(bufferModel, bufferConfig);
3592
else if (framework == "darknet")
3593
return readNetFromDarknet(bufferConfig, bufferModel);
3594
else if (framework == "torch")
3595
CV_Error(Error::StsNotImplemented, "Reading Torch models from buffers");
3596
else if (framework == "dldt")
3597
CV_Error(Error::StsNotImplemented, "Reading Intel's Model Optimizer models from buffers");
3598
CV_Error(Error::StsError, "Cannot determine an origin framework with a name " + framework);
3599
}
3600
3601
Net readNetFromModelOptimizer(const String &xml, const String &bin)
3602
{
3603
return Net::readFromModelOptimizer(xml, bin);
3604
}
3605
3606
CV__DNN_INLINE_NS_END
3607
}} // namespace
3608
3609