CoCalc -- dnn.cpp

GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/dnn/src/dnn.cpp
¹⁶³³⁷ views
1
/*M///////////////////////////////////////////////////////////////////////////////////////
2
//
3
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4
//
5
//  By downloading, copying, installing or using the software you agree to this license.
6
//  If you do not agree to this license, do not download, install,
7
//  copy or use the software.
8
//
9
//
10
//                           License Agreement
11
//                For Open Source Computer Vision Library
12
//
13
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14
// Third party copyrights are property of their respective owners.
15
//
16
// Redistribution and use in source and binary forms, with or without modification,
17
// are permitted provided that the following conditions are met:
18
//
19
//   * Redistribution's of source code must retain the above copyright notice,
20
//     this list of conditions and the following disclaimer.
21
//
22
//   * Redistribution's in binary form must reproduce the above copyright notice,
23
//     this list of conditions and the following disclaimer in the documentation
24
//     and/or other materials provided with the distribution.
25
//
26
//   * The name of the copyright holders may not be used to endorse or promote products
27
//     derived from this software without specific prior written permission.
28
//
29
// This software is provided by the copyright holders and contributors "as is" and
30
// any express or implied warranties, including, but not limited to, the implied
31
// warranties of merchantability and fitness for a particular purpose are disclaimed.
32
// In no event shall the Intel Corporation or contributors be liable for any direct,
33
// indirect, incidental, special, exemplary, or consequential damages
34
// (including, but not limited to, procurement of substitute goods or services;
35
// loss of use, data, or profits; or business interruption) however caused
36
// and on any theory of liability, whether in contract, strict liability,
37
// or tort (including negligence or otherwise) arising in any way out of
38
// the use of this software, even if advised of the possibility of such damage.
39
//
40
//M*/
41

42
#include "precomp.hpp"
43
#include "op_halide.hpp"
44
#include "op_inf_engine.hpp"
45
#include "op_vkcom.hpp"
46
#include "halide_scheduler.hpp"
47
#include <set>
48
#include <algorithm>
49
#include <iostream>
50
#include <sstream>
51
#include <iterator>
52
#include <numeric>
53
#include <opencv2/dnn/shape_utils.hpp>
54
#include <opencv2/imgproc.hpp>
55

56
#include <opencv2/core/utils/configuration.private.hpp>
57
#include <opencv2/core/utils/logger.hpp>
58

59
namespace cv {
60
namespace dnn {
61
CV__DNN_INLINE_NS_BEGIN
62

63
// this option is useful to run valgrind memory errors detection
64
static bool DNN_DISABLE_MEMORY_OPTIMIZATIONS = utils::getConfigurationParameterBool("OPENCV_DNN_DISABLE_MEMORY_OPTIMIZATIONS", false);
65

66
#ifdef HAVE_OPENCL
67
static bool DNN_OPENCL_ALLOW_ALL_DEVICES = utils::getConfigurationParameterBool("OPENCV_DNN_OPENCL_ALLOW_ALL_DEVICES", false);
68
#endif
69

70
static int PARAM_DNN_BACKEND_DEFAULT = (int)utils::getConfigurationParameterSizeT("OPENCV_DNN_BACKEND_DEFAULT",
71
#ifdef HAVE_INF_ENGINE
72
    (size_t)DNN_BACKEND_INFERENCE_ENGINE
73
#else
74
    (size_t)DNN_BACKEND_OPENCV
75
#endif
76
);
77

78
// Additional checks (slowdowns execution!)
79
static bool DNN_CHECK_NAN_INF = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF", false);
80
static bool DNN_CHECK_NAN_INF_DUMP = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_DUMP", false);
81
static bool DNN_CHECK_NAN_INF_RAISE_ERROR = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_RAISE_ERROR", false);
82

83
using std::vector;
84
using std::map;
85
using std::make_pair;
86
using std::set;
87

88
namespace
89
{
90
    typedef std::vector<MatShape> ShapesVec;
91

92
    struct LayerShapes
93
    {
94
        ShapesVec in, out, internal;
95
        // No guarantees that layer which support in-place computations
96
        // will be computed in-place (input.data_ptr == output.data_ptr).
97
        // If layer said that it could work in-place and layers after it
98
        // no longer use input blob, we'll set output = input.
99
        bool supportInPlace;
100
        LayerShapes() {supportInPlace = false;}
101
    };
102
}
103

104
Mat blobFromImage(InputArray image, double scalefactor, const Size& size,
105
                  const Scalar& mean, bool swapRB, bool crop, int ddepth)
106
{
107
    CV_TRACE_FUNCTION();
108
    Mat blob;
109
    blobFromImage(image, blob, scalefactor, size, mean, swapRB, crop, ddepth);
110
    return blob;
111
}
112

113
void blobFromImage(InputArray image, OutputArray blob, double scalefactor,
114
                   const Size& size, const Scalar& mean, bool swapRB, bool crop, int ddepth)
115
{
116
    CV_TRACE_FUNCTION();
117
    std::vector<Mat> images(1, image.getMat());
118
    blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop, ddepth);
119
}
120

121
Mat blobFromImages(InputArrayOfArrays images, double scalefactor, Size size,
122
                   const Scalar& mean, bool swapRB, bool crop, int ddepth)
123
{
124
    CV_TRACE_FUNCTION();
125
    Mat blob;
126
    blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop, ddepth);
127
    return blob;
128
}
129

130
void blobFromImages(InputArrayOfArrays images_, OutputArray blob_, double scalefactor,
131
                    Size size, const Scalar& mean_, bool swapRB, bool crop, int ddepth)
132
{
133
    CV_TRACE_FUNCTION();
134
    CV_CheckType(ddepth, ddepth == CV_32F || ddepth == CV_8U, "Blob depth should be CV_32F or CV_8U");
135
    if (ddepth == CV_8U)
136
    {
137
        CV_CheckEQ(scalefactor, 1.0, "Scaling is not supported for CV_8U blob depth");
138
        CV_Assert(mean_ == Scalar() && "Mean subtraction is not supported for CV_8U blob depth");
139
    }
140

141
    std::vector<Mat> images;
142
    images_.getMatVector(images);
143
    CV_Assert(!images.empty());
144
    for (int i = 0; i < images.size(); i++)
145
    {
146
        Size imgSize = images[i].size();
147
        if (size == Size())
148
            size = imgSize;
149
        if (size != imgSize)
150
        {
151
            if(crop)
152
            {
153
              float resizeFactor = std::max(size.width / (float)imgSize.width,
154
                                            size.height / (float)imgSize.height);
155
              resize(images[i], images[i], Size(), resizeFactor, resizeFactor, INTER_LINEAR);
156
              Rect crop(Point(0.5 * (images[i].cols - size.width),
157
                              0.5 * (images[i].rows - size.height)),
158
                        size);
159
              images[i] = images[i](crop);
160
            }
161
            else
162
              resize(images[i], images[i], size, 0, 0, INTER_LINEAR);
163
        }
164
        if(images[i].depth() == CV_8U && ddepth == CV_32F)
165
            images[i].convertTo(images[i], CV_32F);
166
        Scalar mean = mean_;
167
        if (swapRB)
168
            std::swap(mean[0], mean[2]);
169

170
        images[i] -= mean;
171
        images[i] *= scalefactor;
172
    }
173

174
    size_t i, nimages = images.size();
175
    Mat image0 = images[0];
176
    int nch = image0.channels();
177
    CV_Assert(image0.dims == 2);
178
    Mat image;
179
    if (nch == 3 || nch == 4)
180
    {
181
        int sz[] = { (int)nimages, nch, image0.rows, image0.cols };
182
        blob_.create(4, sz, ddepth);
183
        Mat blob = blob_.getMat();
184
        Mat ch[4];
185

186
        for( i = 0; i < nimages; i++ )
187
        {
188
            image = images[i];
189
            CV_Assert(image.depth() == blob_.depth());
190
            nch = image.channels();
191
            CV_Assert(image.dims == 2 && (nch == 3 || nch == 4));
192
            CV_Assert(image.size() == image0.size());
193

194
            for( int j = 0; j < nch; j++ )
195
                ch[j] = Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, j));
196
            if(swapRB)
197
                std::swap(ch[0], ch[2]);
198
            split(image, ch);
199
        }
200
    }
201
    else
202
    {
203
       CV_Assert(nch == 1);
204
       int sz[] = { (int)nimages, 1, image0.rows, image0.cols };
205
       blob_.create(4, sz, ddepth);
206
       Mat blob = blob_.getMat();
207

208
       for( i = 0; i < nimages; i++ )
209
       {
210
           Mat image = images[i];
211
           CV_Assert(image.depth() == blob_.depth());
212
           nch = image.channels();
213
           CV_Assert(image.dims == 2 && (nch == 1));
214
           CV_Assert(image.size() == image0.size());
215

216
           image.copyTo(Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, 0)));
217
       }
218
    }
219
}
220

221
void imagesFromBlob(const cv::Mat& blob_, OutputArrayOfArrays images_)
222
{
223
    CV_TRACE_FUNCTION();
224

225
    //A blob is a 4 dimensional matrix in floating point precision
226
    //blob_[0] = batchSize = nbOfImages
227
    //blob_[1] = nbOfChannels
228
    //blob_[2] = height
229
    //blob_[3] = width
230
    CV_Assert(blob_.depth() == CV_32F);
231
    CV_Assert(blob_.dims == 4);
232

233
    images_.create(cv::Size(1, blob_.size[0]), blob_.depth());
234

235
    std::vector<Mat> vectorOfChannels(blob_.size[1]);
236
    for (int n = 0; n <  blob_.size[0]; ++n)
237
    {
238
        for (int c = 0; c < blob_.size[1]; ++c)
239
        {
240
            vectorOfChannels[c] = getPlane(blob_, n, c);
241
        }
242
        cv::merge(vectorOfChannels, images_.getMatRef(n));
243
    }
244
}
245

246
class OpenCLBackendWrapper : public BackendWrapper
247
{
248
public:
249
    OpenCLBackendWrapper(Mat& m) : BackendWrapper(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL)
250
    {
251
        m.copyTo(umat);
252
        host = &m;
253
        hostDirty = false;
254
    }
255

256
    OpenCLBackendWrapper(const Ptr<BackendWrapper>& baseBuffer, Mat& m)
257
        : BackendWrapper(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL)
258
    {
259
        Ptr<OpenCLBackendWrapper> base = baseBuffer.dynamicCast<OpenCLBackendWrapper>();
260
        CV_Assert(!base.empty());
261

262
        host = &m;
263

264
        int shape[] = {1, (int)base->umat.total()};
265
        umat = base->umat.reshape(1, 2, &shape[0])
266
                         .colRange(0, host->total())
267
                         .reshape(1, host->dims, &host->size[0]);
268
        hostDirty = false;
269
    }
270

271
    static Ptr<BackendWrapper> create(Mat& m)
272
    {
273
        return Ptr<BackendWrapper>(new OpenCLBackendWrapper(m));
274
    }
275

276
    static Ptr<BackendWrapper> create(const Ptr<BackendWrapper>& baseBuffer, Mat& m)
277
    {
278
        return Ptr<BackendWrapper>(new OpenCLBackendWrapper(baseBuffer, m));
279
    }
280

281
    static std::vector<UMat> getUMatVector(const std::vector<Ptr<BackendWrapper> >& wrappers)
282
    {
283
        const int numWrappers = wrappers.size();
284
        std::vector<UMat> mats(wrappers.size());
285
        for (int i = 0; i < numWrappers; ++i)
286
        {
287
            Ptr<OpenCLBackendWrapper> umatWrapper = wrappers[i].dynamicCast<OpenCLBackendWrapper>();
288
            CV_Assert(!umatWrapper.empty());
289
            umatWrapper->copyToDevice();
290
            mats[i] = umatWrapper->umat;
291
        }
292
        return mats;
293
    }
294

295
    // Replaces all umats in wrappers to specific ones.
296
    static void update(const std::vector<Ptr<BackendWrapper> >& wrappers,
297
                       const std::vector<UMat>& umats)
298
    {
299
        CV_Assert(wrappers.size() == umats.size());
300
        for (int i = 0, n = umats.size(); i < n; ++i)
301
        {
302
            Ptr<OpenCLBackendWrapper> umatWrapper = wrappers[i].dynamicCast<OpenCLBackendWrapper>();
303
            CV_Assert(!umatWrapper.empty());
304
            umatWrapper->umat = umats[i];
305
        }
306
    }
307

308
    ~OpenCLBackendWrapper() {}
309

310
    // Copies data from device to a host memory.
311
    virtual void copyToHost() CV_OVERRIDE
312
    {
313
        umat.copyTo(*host);
314
    }
315

316
    virtual void setHostDirty() CV_OVERRIDE
317
    {
318
        hostDirty = true;
319
    };
320

321
    void copyToDevice()
322
    {
323
        if (hostDirty)
324
        {
325
            host->copyTo(umat);
326
            hostDirty = false;
327
        }
328
    }
329

330
private:
331
    UMat umat;
332
    Mat* host;
333
    bool hostDirty;
334
};
335

336
struct LayerPin
337
{
338
    int lid;
339
    int oid;
340

341
    LayerPin(int layerId = -1, int outputId = -1)
342
        : lid(layerId), oid(outputId) {}
343

344
    bool valid() const
345
    {
346
        return (lid >= 0 && oid >= 0);
347
    }
348

349
    bool equal(const LayerPin &r) const
350
    {
351
        return (lid == r.lid && oid == r.oid);
352
    }
353

354
    bool operator<(const LayerPin &r) const
355
    {
356
        return lid < r.lid || lid == r.lid && oid < r.oid;
357
    }
358

359
    bool operator ==(const LayerPin &r) const
360
    {
361
        return lid == r.lid && oid == r.oid;
362
    }
363
};
364

365
struct LayerData
366
{
367
    LayerData() : id(-1), skip(false), flag(0) {}
368
    LayerData(int _id, const String &_name, const String &_type, LayerParams &_params)
369
        : id(_id), name(_name), type(_type), params(_params), skip(false), flag(0)
370
    {
371
        CV_TRACE_FUNCTION();
372

373
        //add logging info
374
        params.name = name;
375
        params.type = type;
376
    }
377

378
    int id;
379
    String name;
380
    String type;
381
    LayerParams params;
382

383
    std::vector<LayerPin> inputBlobsId;
384
    std::set<int> inputLayersId;
385
    std::set<int> requiredOutputs;
386
    std::vector<LayerPin> consumers;
387
    std::vector<Ptr<BackendWrapper> > outputBlobsWrappers;
388
    std::vector<Ptr<BackendWrapper> > inputBlobsWrappers;
389
    std::vector<Ptr<BackendWrapper> > internalBlobsWrappers;
390

391
    Ptr<Layer> layerInstance;
392
    std::vector<Mat> outputBlobs;
393
    std::vector<Mat*> inputBlobs;
394
    std::vector<Mat> internals;
395
    // Computation nodes of implemented backends (except DEFAULT).
396
    std::map<int, Ptr<BackendNode> > backendNodes;
397
    // Flag for skip layer computation for specific backend.
398
    bool skip;
399

400
    int flag;
401

402
    Ptr<Layer> getLayerInstance()
403
    {
404
        CV_TRACE_FUNCTION();
405
        CV_TRACE_ARG_VALUE(type, "type", type.c_str());
406

407
        if (layerInstance)
408
            return layerInstance;
409

410
        layerInstance = LayerFactory::createLayerInstance(type, params);
411
        if (!layerInstance)
412
        {
413
            CV_Error(Error::StsError, "Can't create layer \"" + name + "\" of type \"" + type + "\"");
414
        }
415

416
        return layerInstance;
417
    }
418
};
419

420
//fake layer containing network input blobs
421
struct DataLayer : public Layer
422
{
423
    DataLayer() : Layer()
424
    {
425
        skip = false;
426
    }
427

428
    virtual bool supportBackend(int backendId) CV_OVERRIDE
429
    {
430
        return backendId == DNN_BACKEND_OPENCV ||
431
               backendId == DNN_BACKEND_INFERENCE_ENGINE && inputsData.size() == 1;
432
    }
433

434
    void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
435
    {
436
        CV_TRACE_FUNCTION();
437
        CV_TRACE_ARG_VALUE(name, "name", name.c_str());
438

439
        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
440
                   forward_ocl(inputs_arr, outputs_arr, internals_arr))
441

442
        if (outputs_arr.depth() == CV_16S)
443
        {
444
            forward_fallback(inputs_arr, outputs_arr, internals_arr);
445
            return;
446
        }
447

448
        std::vector<Mat> outputs, internals;
449
        outputs_arr.getMatVector(outputs);
450
        internals_arr.getMatVector(internals);
451

452
        // Supported modes:
453
        // | Input type | Output type |
454
        // |       fp32 |        fp32 |
455
        // |      uint8 |        fp32 |
456
        for (int i = 0; i < inputsData.size(); ++i)
457
        {
458
            double scale = scaleFactors[i];
459
            Scalar& mean = means[i];
460
            CV_Assert(mean == Scalar() || inputsData[i].size[1] <= 4);
461
            CV_CheckTypeEQ(outputs[i].type(), CV_32FC1, "");
462

463
            bool singleMean = true;
464
            for (int j = 1; j < std::min(4, inputsData[i].size[1]) && singleMean; ++j)
465
            {
466
                singleMean = mean[j] == mean[j - 1];
467
            }
468

469
            if (singleMean)
470
            {
471
                inputsData[i].convertTo(outputs[i], CV_32F, scale, -mean[0] * scale);
472
            }
473
            else
474
            {
475
                for (int n = 0; n < inputsData[i].size[0]; ++n)
476
                    for (int c = 0; c < inputsData[i].size[1]; ++c)
477
                    {
478
                        Mat inp = getPlane(inputsData[i], n, c);
479
                        Mat out = getPlane(outputs[i], n, c);
480
                        inp.convertTo(out, CV_32F, scale, -mean[c] * scale);
481
                    }
482
            }
483
        }
484
    }
485

486
#ifdef HAVE_OPENCL
487
    std::vector<Mat> tmp_expressions;
488
    bool forward_ocl(InputArrayOfArrays, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
489
    {
490
        // Supported modes:
491
        // | Input type | Output type |
492
        // |       fp32 |        fp32 |
493
        // |       fp32 |        fp16 |
494
        // |      uint8 |        fp32 |
495
        std::vector<UMat> outputs;
496
        outputs_.getUMatVector(outputs);
497

498
        tmp_expressions.clear();
499
        for (int i = 0; i < inputsData.size(); ++i)
500
        {
501
            Mat inputData = inputsData[i];
502

503
            double scale = scaleFactors[i];
504
            Scalar& mean = means[i];
505

506
            CV_Assert(mean == Scalar() || inputsData[i].size[1] <= 4);
507
            bool singleMean = true;
508
            for (int j = 1; j < std::min(4, inputsData[i].size[1]) && singleMean; ++j)
509
            {
510
                singleMean = mean[j] == mean[j - 1];
511
            }
512

513
            if (outputs_.depth() == CV_16S)
514
            {
515
                if (singleMean)
516
                {
517
                    tmp_expressions.push_back(Mat(scale * (inputsData[i] - mean[0])));
518
                    convertFp16(tmp_expressions.back(), outputs[i]);
519
                }
520
                else
521
                {
522
                    for (int n = 0; n < inputsData[i].size[0]; ++n)
523
                        for (int c = 0; c < inputsData[i].size[1]; ++c)
524
                        {
525
                            Mat inp = getPlane(inputsData[i], n, c);
526

527
                            std::vector<cv::Range> plane(4, Range::all());
528
                            plane[0] = Range(n, n + 1);
529
                            plane[1] = Range(c, c + 1);
530
                            UMat out = outputs[i](plane).reshape(1, inp.dims, inp.size);
531

532
                            tmp_expressions.push_back(scale * (inp - mean[c]));
533
                            convertFp16(tmp_expressions.back(), out);
534
                        }
535
                }
536
            }
537
            else
538
            {
539
                CV_Assert(outputs_.depth() == CV_32F);
540
                if (singleMean)
541
                {
542
                    inputsData[i].convertTo(outputs[i], CV_32F, scale, -mean[0] * scale);
543
                }
544
                else
545
                {
546
                    for (int n = 0; n < inputsData[i].size[0]; ++n)
547
                        for (int c = 0; c < inputsData[i].size[1]; ++c)
548
                        {
549
                            Mat inp = getPlane(inputsData[i], n, c);
550

551
                            std::vector<cv::Range> plane(4, Range::all());
552
                            plane[0] = Range(n, n + 1);
553
                            plane[1] = Range(c, c + 1);
554
                            UMat out = outputs[i](plane).reshape(1, inp.dims, inp.size);
555

556
                            inp.convertTo(out, CV_32F, scale, -mean[c] * scale);
557
                        }
558
                }
559
            }
560
        }
561
        return true;
562
    }
563
#endif
564

565
    int outputNameToIndex(const String& tgtName) CV_OVERRIDE
566
    {
567
        int idx = (int)(std::find(outNames.begin(), outNames.end(), tgtName) - outNames.begin());
568
        return (idx < (int)outNames.size()) ? idx : -1;
569
    }
570

571
    void setNames(const std::vector<String> &names)
572
    {
573
        outNames.assign(names.begin(), names.end());
574
    }
575

576
    bool getMemoryShapes(const std::vector<MatShape> &inputs,
577
                         const int requiredOutputs,
578
                         std::vector<MatShape> &outputs,
579
                         std::vector<MatShape> &internals) const CV_OVERRIDE
580
    {
581
        CV_Assert(inputs.size() == requiredOutputs);
582
        outputs.assign(inputs.begin(), inputs.end());
583
        return false;
584
    }
585

586
    virtual void finalize(InputArrayOfArrays, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
587
    {
588
        std::vector<Mat> outputs;
589
        outputs_arr.getMatVector(outputs);
590

591
        CV_Assert_N(outputs.size() == scaleFactors.size(), outputs.size() == means.size(),
592
                  inputsData.size() == outputs.size());
593
        skip = true;
594
        for (int i = 0; skip && i < inputsData.size(); ++i)
595
        {
596
            if (inputsData[i].data != outputs[i].data || scaleFactors[i] != 1.0 || means[i] != Scalar())
597
                skip = false;
598
        }
599
    }
600

601
    virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
602
    {
603
#ifdef HAVE_INF_ENGINE
604
        InferenceEngine::LayerParams lp;
605
        lp.name = name;
606
        lp.type = "ScaleShift";
607
        lp.precision = InferenceEngine::Precision::FP32;
608
        std::shared_ptr<InferenceEngine::ScaleShiftLayer> ieLayer(new InferenceEngine::ScaleShiftLayer(lp));
609

610
        CV_CheckEQ(inputsData.size(), (size_t)1, "");
611
        CV_CheckEQ(inputsData[0].dims, 4, "");
612
        const size_t numChannels = inputsData[0].size[1];
613
        CV_Assert(numChannels <= 4);
614

615
        // Scale
616
        auto weights = InferenceEngine::make_shared_blob<float>(InferenceEngine::Precision::FP32,
617
                                                                {numChannels});
618
        weights->allocate();
619
        weights->set(std::vector<float>(numChannels, scaleFactors[0]));
620
        ieLayer->_weights = weights;
621

622
        // Mean subtraction
623
        auto biases = InferenceEngine::make_shared_blob<float>(InferenceEngine::Precision::FP32,
624
                                                               {numChannels});
625
        biases->allocate();
626
        std::vector<float> biasesVec(numChannels);
627
        for (int i = 0; i < numChannels; ++i)
628
        {
629
            biasesVec[i] = -means[0][i] * scaleFactors[0];
630
        }
631
        biases->set(biasesVec);
632
        ieLayer->_biases = biases;
633

634
        return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
635
#endif  // HAVE_INF_ENGINE
636
        return Ptr<BackendNode>();
637
    }
638

639
    std::vector<String> outNames;
640
    // Preprocessing parameters for each network's input.
641
    std::vector<double> scaleFactors;
642
    std::vector<Scalar> means;
643
    std::vector<Mat> inputsData;
644
    bool skip;
645
};
646

647
struct BlobManager
648
{
649
public:
650
    // Increase references counter to layer output.
651
    void addReference(const LayerPin& lp)
652
    {
653
        std::map<LayerPin, int>::iterator it = refCounter.find(lp);
654
        if (it == refCounter.end())
655
            refCounter[lp] = 1;
656
        else
657
            it->second += 1;
658
    }
659

660
    void addReferences(const std::vector<LayerPin>& pins)
661
    {
662
        for (int i = 0; i < pins.size(); i++)
663
        {
664
            addReference(pins[i]);
665
        }
666
    }
667

668
    // Returns number of references to allocated memory that used in specific
669
    // layer blob.
670
    int numReferences(const LayerPin& lp)
671
    {
672
        std::map<LayerPin, LayerPin>::iterator mapIt = reuseMap.find(lp);
673
        CV_Assert(mapIt != reuseMap.end());
674
        LayerPin memHost = mapIt->second;
675

676
        std::map<LayerPin, int>::iterator refIt = refCounter.find(memHost);
677
        CV_Assert(refIt != refCounter.end());
678
        return refIt->second;
679
    }
680

681
    // Reuse data allocated in <host> inside the <user> blob.
682
    void reuse(const LayerPin& host, const LayerPin& user)
683
    {
684
        CV_Assert(reuseMap.find(user) == reuseMap.end());
685
        CV_Assert(reuseMap.find(host) != reuseMap.end());
686
        LayerPin memHost = reuseMap[host];
687
        reuseMap[user] = memHost;
688
        if (refCounter.find(memHost) != refCounter.end())
689
        {
690
            std::map<LayerPin, int>::iterator userRefIt = refCounter.find(user);
691
            if (userRefIt != refCounter.end())
692
            {
693
                refCounter[memHost] += userRefIt->second;
694
                refCounter.erase(userRefIt);
695
            }
696
            else
697
                refCounter[memHost] += 1;
698
        }
699
    }
700

701
    // Decrease references counter to allocated memory inside specific blob.
702
    void releaseReference(const LayerPin& lp)
703
    {
704
        std::map<LayerPin, LayerPin>::iterator mapIt = reuseMap.find(lp);
705
        CV_Assert(mapIt != reuseMap.end());
706

707
        std::map<LayerPin, int>::iterator refIt = refCounter.find(mapIt->second);
708
        CV_Assert(refIt != refCounter.end());
709
        CV_Assert(refIt->second > 0);
710
        refIt->second -= 1;
711
    }
712

713
    void releaseReferences(const std::vector<LayerPin>& pins)
714
    {
715
        for (int i = 0; i < pins.size(); i++)
716
        {
717
            releaseReference(pins[i]);
718
        }
719
    }
720

721
    void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, bool use_half)
722
    {
723
        if (!DNN_DISABLE_MEMORY_OPTIMIZATIONS)
724
        {
725
            Mat bestBlob;
726
            LayerPin bestBlobPin;
727

728
            std::map<LayerPin, Mat>::iterator hostIt;
729
            std::map<LayerPin, int>::iterator refIt;
730

731
            const int targetTotal = total(shape);
732
            int bestBlobTotal = INT_MAX;
733

734
            for (hostIt = memHosts.begin(); hostIt != memHosts.end(); ++hostIt)
735
            {
736
                refIt = refCounter.find(hostIt->first);
737
                // Use only blobs that had references before because if not,
738
                // it might be used as output.
739
                if (refIt != refCounter.end() && refIt->second == 0)
740
                {
741
                    Mat& unusedBlob = hostIt->second;
742
                    if (unusedBlob.total() >= targetTotal &&
743
                        unusedBlob.total() < bestBlobTotal)
744
                    {
745
                        bestBlobPin = hostIt->first;
746
                        bestBlob = unusedBlob;
747
                        bestBlobTotal = unusedBlob.total();
748
                    }
749
                }
750
            }
751
            if (!bestBlob.empty())
752
            {
753
                reuse(bestBlobPin, lp);
754
                dst = bestBlob.reshape(1, 1).colRange(0, targetTotal).reshape(1, shape);
755
                return;
756
            }
757
        }
758

759
        {
760
            // if dst already has been allocated with total(shape) elements,
761
            // it won't be recreated and pointer of dst.data remains the same.
762
            dst.create(shape, use_half ? CV_16S : CV_32F);
763
            addHost(lp, dst);
764
        }
765
    }
766

767
    void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes,
768
                               std::vector<LayerPin>& pinsForInternalBlobs,
769
                               bool use_half = false)
770
    {
771
        CV_TRACE_FUNCTION();
772

773
        pinsForInternalBlobs.clear();
774

775
        std::vector<Mat>& outputBlobs = ld.outputBlobs,
776
                &internalBlobs = ld.internals;
777

778
        const ShapesVec& outShapes = layerShapes.out,
779
                internalShapes = layerShapes.internal;
780

781
        outputBlobs.resize(std::max((size_t)1, outShapes.size())); //layer produce at least one output blob
782
        internalBlobs.resize(internalShapes.size());
783

784
        CV_Assert(ld.requiredOutputs.size() <= outShapes.size());
785

786
        // Check that layer could work in-place.
787
        bool inPlace = false;
788
        if (layerShapes.supportInPlace)
789
        {
790
            if (ld.inputBlobs.size() == 1)
791
            {
792
                // Get number of references to the input memory.
793
                int numRef = numReferences(ld.inputBlobsId[0]);
794
                // If current layer is one and only customer of this blob.
795
                inPlace = numRef == 1;
796
            }
797
        }
798

799
        ShapesVec shapes(outShapes);
800
        shapes.insert(shapes.end(), internalShapes.begin(), internalShapes.end());
801
        std::vector<Mat*> blobs;
802
        for(int i = 0; i < outputBlobs.size(); i++)
803
        {
804
            blobs.push_back(&outputBlobs[i]);
805
        }
806

807
        for(int i = 0; i < internalBlobs.size(); i++)
808
        {
809
            blobs.push_back(&internalBlobs[i]);
810
            if (total(internalShapes[i]))
811
            {
812
                pinsForInternalBlobs.push_back(LayerPin(ld.id, ld.outputBlobs.size() + i));
813
            }
814
        }
815

816
        addReferences(pinsForInternalBlobs);
817

818
        std::map<int, std::vector<int> > idxSizes;
819
        for(int i = 0; i < shapes.size(); i++)
820
        {
821
            idxSizes[total(shapes[i])].push_back(i);
822
        }
823

824
        std::map<int, std::vector<int> >::reverse_iterator it;
825
        for(it = idxSizes.rbegin(); it != idxSizes.rend(); it++)
826
        {
827
            for(int j = 0; j < it->second.size(); j++)
828
            {
829
                int index = it->second[j];
830
                if (total(shapes[index]))
831
                {
832
                    LayerPin blobPin(ld.id, index);
833
                    if (index < outShapes.size() && inPlace)
834
                    {
835
                        CV_Assert(ld.inputBlobs[0]->total() == total(shapes[index]));
836
                        ld.outputBlobs[index] = ld.inputBlobs[0]->reshape(1, shapes[index]);
837
                        reuse(ld.inputBlobsId[0], blobPin);
838
                    }
839
                    else
840
                        reuseOrCreate(shapes[index], blobPin, *blobs[index], use_half);
841
                }
842
            }
843
        }
844
    }
845

846
    // Clear internal state. Calls before an every reallocation.
847
    void reset()
848
    {
849
        CV_TRACE_FUNCTION();
850

851
        refCounter.clear();
852
        reuseMap.clear();
853
        memHosts.clear();
854
    }
855

856
private:
857
    // Register allocated memory.
858
    void addHost(const LayerPin& lp, const Mat& mat)
859
    {
860
        CV_Assert(memHosts.find(lp) == memHosts.end());
861
        reuseMap[lp] = lp;
862
        memHosts[lp] = mat;
863
    }
864

865
    std::map<LayerPin, int> refCounter;
866
    // Maps pin to origin blob (for whom memory was allocated firstly).
867
    // For origin blobs key == value.
868
    std::map<LayerPin, LayerPin> reuseMap;
869
    std::map<LayerPin, Mat> memHosts;
870
};
871

872
static Ptr<BackendWrapper> wrapMat(int backendId, int targetId, cv::Mat& m)
873
{
874
    if (backendId == DNN_BACKEND_OPENCV)
875
    {
876
        if (targetId == DNN_TARGET_CPU)
877
            return Ptr<BackendWrapper>();
878
        else if (IS_DNN_OPENCL_TARGET(targetId))
879
            return OpenCLBackendWrapper::create(m);
880
        else
881
            CV_Error(Error::StsNotImplemented, "Unknown target identifier");
882
    }
883
    else if (backendId == DNN_BACKEND_HALIDE)
884
    {
885
        CV_Assert(haveHalide());
886
#ifdef HAVE_HALIDE
887
        return Ptr<BackendWrapper>(new HalideBackendWrapper(targetId, m));
888
#endif  // HAVE_HALIDE
889
    }
890
    else if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
891
    {
892
        CV_Assert(haveInfEngine());
893
#ifdef HAVE_INF_ENGINE
894
        return Ptr<BackendWrapper>(new InfEngineBackendWrapper(targetId, m));
895
#endif  // HAVE_INF_ENGINE
896
    }
897
    else if (backendId == DNN_BACKEND_VKCOM)
898
    {
899
        CV_Assert(haveVulkan());
900
#ifdef HAVE_VULKAN
901
        return Ptr<BackendWrapper>(new VkComBackendWrapper(m));
902
#endif  // HAVE_VULKAN
903
    }
904
    else
905
        CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
906
    return Ptr<BackendWrapper>();
907
}
908

909
struct Net::Impl
910
{
911
    typedef std::map<int, LayerShapes> LayersShapesMap;
912
    typedef std::map<int, LayerData> MapIdToLayerData;
913

914
    ~Impl()
915
    {
916
#ifdef HAVE_VULKAN
917
        // Vulkan requires explicit releasing the child objects of
918
        // VkDevice object prior to releasing VkDevice object itself.
919
        layers.clear();
920
        backendWrappers.clear();
921
        vkcom::deinitPerThread();
922
#endif
923
    }
924
    Impl()
925
    {
926
#ifdef HAVE_VULKAN
927
        vkcom::initPerThread();
928
#endif
929
        //allocate fake net input layer
930
        netInputLayer = Ptr<DataLayer>(new DataLayer());
931
        LayerData &inpl = layers.insert( make_pair(0, LayerData()) ).first->second;
932
        inpl.id = 0;
933
        netInputLayer->name = inpl.name = "_input";
934
        inpl.type = "__NetInputLayer__";
935
        inpl.layerInstance = netInputLayer;
936
        layerNameToId.insert(std::make_pair(inpl.name, inpl.id));
937

938
        lastLayerId = 0;
939
        netWasAllocated = false;
940
        fusion = true;
941
        preferableBackend = DNN_BACKEND_DEFAULT;
942
        preferableTarget = DNN_TARGET_CPU;
943
        skipInfEngineInit = false;
944
    }
945

946
    Ptr<DataLayer> netInputLayer;
947
    std::vector<LayerPin> blobsToKeep;
948
    MapIdToLayerData layers;
949
    std::map<String, int> layerNameToId;
950
    BlobManager blobManager;
951
    int preferableBackend;
952
    int preferableTarget;
953
    String halideConfigFile;
954
    bool skipInfEngineInit;
955
    // Map host data to backend specific wrapper.
956
    std::map<void*, Ptr<BackendWrapper> > backendWrappers;
957

958
    int lastLayerId;
959

960
    bool netWasAllocated;
961
    bool fusion;
962
    std::vector<int64> layersTimings;
963
    Mat output_blob;
964

965
    Ptr<BackendWrapper> wrap(Mat& host)
966
    {
967
        if (preferableBackend == DNN_BACKEND_OPENCV && preferableTarget == DNN_TARGET_CPU)
968
            return Ptr<BackendWrapper>();
969

970
        MatShape shape(host.dims);
971
        for (int i = 0; i < host.dims; ++i)
972
            shape[i] = host.size[i];
973

974
        void* data = host.data;
975
        if (backendWrappers.find(data) != backendWrappers.end())
976
        {
977
            Ptr<BackendWrapper> baseBuffer = backendWrappers[data];
978
            if (preferableBackend == DNN_BACKEND_OPENCV)
979
            {
980
                CV_Assert(IS_DNN_OPENCL_TARGET(preferableTarget));
981
                return OpenCLBackendWrapper::create(baseBuffer, host);
982
            }
983
            else if (preferableBackend == DNN_BACKEND_HALIDE)
984
            {
985
                CV_Assert(haveHalide());
986
  #ifdef HAVE_HALIDE
987
                return Ptr<BackendWrapper>(new HalideBackendWrapper(baseBuffer, shape));
988
  #endif  // HAVE_HALIDE
989
            }
990
            else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE)
991
            {
992
                return wrapMat(preferableBackend, preferableTarget, host);
993
            }
994
            else if (preferableBackend == DNN_BACKEND_VKCOM)
995
            {
996
  #ifdef HAVE_VULKAN
997
                return Ptr<BackendWrapper>(new VkComBackendWrapper(baseBuffer, host));
998
  #endif
999
            }
1000
            else
1001
                CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
1002
        }
1003

1004
        Ptr<BackendWrapper> wrapper = wrapMat(preferableBackend, preferableTarget, host);
1005
        backendWrappers[data] = wrapper;
1006
        return wrapper;
1007
    }
1008

1009
#ifdef HAVE_HALIDE
1010
    void compileHalide()
1011
    {
1012
        CV_TRACE_FUNCTION();
1013

1014
        CV_Assert(preferableBackend == DNN_BACKEND_HALIDE);
1015

1016
        HalideScheduler scheduler(halideConfigFile);
1017
        std::vector< std::reference_wrapper<LayerData> > compileList; compileList.reserve(64);
1018
        for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it)
1019
        {
1020
            LayerData &ld = it->second;
1021
            Ptr<Layer> layer = ld.layerInstance;
1022
            if (layer->supportBackend(DNN_BACKEND_HALIDE) && !ld.skip)
1023
            {
1024
                CV_Assert(!ld.backendNodes[DNN_BACKEND_HALIDE].empty());
1025
                bool scheduled = scheduler.process(ld.backendNodes[DNN_BACKEND_HALIDE]);
1026
                if (!scheduled)
1027
                {
1028
                    // Use automatic scheduling provided by layer.
1029
                    layer->applyHalideScheduler(ld.backendNodes[DNN_BACKEND_HALIDE],
1030
                                                ld.inputBlobs, ld.outputBlobs,
1031
                                                preferableTarget);
1032
                }
1033
                compileList.emplace_back(ld);
1034
            }
1035
        }
1036
        std::atomic<int> progress(0);
1037
        auto fn = ([&] () -> void
1038
        {
1039
            for (;;)
1040
            {
1041
                int id = progress.fetch_add(1);
1042
                if ((size_t)id >= compileList.size())
1043
                    return;
1044
                const LayerData& ld = compileList[id].get();
1045
                Ptr<BackendNode> node = ld.backendNodes.find(DNN_BACKEND_HALIDE)->second;
1046
                dnn::compileHalide(ld.outputBlobs, node, preferableTarget);
1047
            }
1048
        });
1049
        size_t num_threads = std::min(compileList.size(), (size_t)std::thread::hardware_concurrency());
1050
        num_threads = std::max((size_t)1u, std::min((size_t)8u, num_threads));
1051
        std::vector<std::thread> threads(num_threads - 1);
1052
        for (auto& t: threads) t = std::thread(fn);
1053
        fn(); // process own tasks
1054
        for (auto& t: threads) t.join();
1055
    }
1056
#endif
1057

1058
    void clear()
1059
    {
1060
        CV_TRACE_FUNCTION();
1061

1062
        MapIdToLayerData::iterator it;
1063
        for (it = layers.begin(); it != layers.end(); it++)
1064
        {
1065
            if (it->second.id != 0) {
1066
                it->second.inputBlobs.clear();
1067
                it->second.outputBlobs.clear();
1068
                it->second.internals.clear();
1069
            }
1070
            it->second.skip = false;
1071
            //it->second.consumers.clear();
1072
            Ptr<Layer> currLayer = it->second.layerInstance;
1073

1074
            if( currLayer.empty() )
1075
                continue;
1076

1077
            currLayer->unsetAttached();
1078

1079
            Ptr<PoolingLayer> poolingLayer = currLayer.dynamicCast<PoolingLayer>();
1080
            if( !poolingLayer.empty() )
1081
            {
1082
                poolingLayer->computeMaxIdx = true;
1083
            }
1084
        }
1085

1086
        layersTimings.clear();
1087
    }
1088

1089
    void setUpNet(const std::vector<LayerPin>& blobsToKeep_ = std::vector<LayerPin>())
1090
    {
1091
        CV_TRACE_FUNCTION();
1092

1093
        if (preferableBackend == DNN_BACKEND_DEFAULT)
1094
            preferableBackend = (Backend)PARAM_DNN_BACKEND_DEFAULT;
1095

1096
        CV_Assert(preferableBackend != DNN_BACKEND_OPENCV ||
1097
                  preferableTarget == DNN_TARGET_CPU ||
1098
                  preferableTarget == DNN_TARGET_OPENCL ||
1099
                  preferableTarget == DNN_TARGET_OPENCL_FP16);
1100
        CV_Assert(preferableBackend != DNN_BACKEND_HALIDE ||
1101
                  preferableTarget == DNN_TARGET_CPU ||
1102
                  preferableTarget == DNN_TARGET_OPENCL);
1103
        CV_Assert(preferableBackend != DNN_BACKEND_INFERENCE_ENGINE ||
1104
                  preferableTarget == DNN_TARGET_CPU ||
1105
                  preferableTarget == DNN_TARGET_OPENCL ||
1106
                  preferableTarget == DNN_TARGET_OPENCL_FP16 ||
1107
                  preferableTarget == DNN_TARGET_MYRIAD);
1108
        CV_Assert(preferableBackend != DNN_BACKEND_VKCOM ||
1109
                  preferableTarget == DNN_TARGET_VULKAN);
1110
        if (!netWasAllocated || this->blobsToKeep != blobsToKeep_)
1111
        {
1112
            if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
1113
#ifndef HAVE_OPENCL
1114
            {
1115
                CV_LOG_WARNING(NULL, "DNN: OpenCL target is not available in this OpenCV build, switching to CPU.");
1116
                preferableTarget = DNN_TARGET_CPU;
1117
            }
1118
#else
1119
            {
1120
                if (!DNN_OPENCL_ALLOW_ALL_DEVICES)
1121
                {
1122
                    // Current implementation is only valid for GPU (#11494)
1123
                    if (ocl::Device::getDefault().type() != ocl::Device::TYPE_GPU)
1124
                    {
1125
                        CV_LOG_WARNING(NULL, "DNN: OpenCL target is not supported with current OpenCL device (tested with GPUs only), switching to CPU.");
1126
                        preferableTarget = DNN_TARGET_CPU;
1127
                    }
1128
                    else if (preferableTarget == DNN_TARGET_OPENCL_FP16 && !ocl::Device::getDefault().isIntel())
1129
                    {
1130
                        CV_LOG_WARNING(NULL,
1131
                            "DNN: OpenCL target with fp16 precision is not supported "
1132
                            "with current OpenCL device (tested with Intel GPUs only), "
1133
                            "switching to OpenCL with fp32 precision.");
1134
                        preferableTarget = DNN_TARGET_OPENCL;
1135
                    }
1136
                }
1137
            }
1138
#endif
1139
            if (preferableBackend == DNN_BACKEND_VKCOM && !haveVulkan())
1140
            {
1141
                preferableBackend = DNN_BACKEND_OPENCV;
1142
                preferableTarget = DNN_TARGET_CPU;
1143
            }
1144

1145
            clear();
1146

1147
            allocateLayers(blobsToKeep_);
1148

1149
            MapIdToLayerData::iterator it = layers.find(0);
1150
            CV_Assert(it != layers.end());
1151
            it->second.skip = netInputLayer->skip;
1152

1153
            initBackend();
1154

1155
            if (!netWasAllocated )
1156
            {
1157
#ifdef HAVE_HALIDE
1158
                if (preferableBackend == DNN_BACKEND_HALIDE)
1159
                    compileHalide();
1160
#else
1161
                CV_Assert(preferableBackend != DNN_BACKEND_HALIDE);
1162
#endif
1163
            }
1164

1165
            netWasAllocated = true;
1166
            this->blobsToKeep = blobsToKeep_;
1167
        }
1168
    }
1169

1170
    int getLayerId(const String &layerName)
1171
    {
1172
        std::map<String, int>::iterator it = layerNameToId.find(layerName);
1173
        return (it != layerNameToId.end()) ? it->second : -1;
1174
    }
1175

1176
    int getLayerId(int id)
1177
    {
1178
        MapIdToLayerData::iterator it = layers.find(id);
1179
        return (it != layers.end()) ? id : -1;
1180
    }
1181

1182
    int getLayerId(DictValue &layerDesc)
1183
    {
1184
        if (layerDesc.isInt())
1185
            return getLayerId(layerDesc.get<int>());
1186
        else if (layerDesc.isString())
1187
            return getLayerId(layerDesc.get<String>());
1188

1189
        CV_Assert(layerDesc.isInt() || layerDesc.isString());
1190
        return -1;
1191
    }
1192

1193
    String getLayerName(int id)
1194
    {
1195
        MapIdToLayerData::iterator it = layers.find(id);
1196
        return (it != layers.end()) ? it->second.name : "(unknown layer)";
1197
    }
1198

1199
    LayerData& getLayerData(int id)
1200
    {
1201
        MapIdToLayerData::iterator it = layers.find(id);
1202

1203
        if (it == layers.end())
1204
            CV_Error(Error::StsObjectNotFound, format("Layer with requested id=%d not found", id));
1205

1206
        return it->second;
1207
    }
1208

1209
    LayerData& getLayerData(const String &layerName)
1210
    {
1211
        int id = getLayerId(layerName);
1212

1213
        if (id < 0)
1214
            CV_Error(Error::StsError, "Requested layer \"" + layerName + "\" not found");
1215

1216
        return getLayerData(id);
1217
    }
1218

1219
    LayerData& getLayerData(const DictValue &layerDesc)
1220
    {
1221
        CV_Assert(layerDesc.isInt() || layerDesc.isString());
1222
        if (layerDesc.isInt())
1223
            return getLayerData(layerDesc.get<int>());
1224
        else /*if (layerDesc.isString())*/
1225
            return getLayerData(layerDesc.get<String>());
1226
    }
1227

1228
    static void addLayerInput(LayerData &ld, int inNum, LayerPin from)
1229
    {
1230
        if ((int)ld.inputBlobsId.size() <= inNum)
1231
        {
1232
            ld.inputBlobsId.resize(inNum + 1);
1233
        }
1234
        else
1235
        {
1236
            LayerPin storedFrom = ld.inputBlobsId[inNum];
1237
            if (storedFrom.valid() && !storedFrom.equal(from))
1238
                CV_Error(Error::StsError, format("Input #%d of layer \"%s\" already was connected",
1239
                                                 inNum, ld.name.c_str()));
1240
        }
1241

1242
        ld.inputBlobsId[inNum] = from;
1243
    }
1244

1245
    int resolvePinOutputName(LayerData &ld, const String &outName)
1246
    {
1247
        if (outName.empty())
1248
            return 0;
1249
        return ld.getLayerInstance()->outputNameToIndex(outName);
1250
    }
1251

1252
    LayerPin getPinByAlias(const String &layerName)
1253
    {
1254
        LayerPin pin;
1255
        pin.lid = (layerName.empty()) ? 0 : getLayerId(layerName);
1256

1257
        if (pin.lid >= 0)
1258
            pin.oid = resolvePinOutputName(getLayerData(pin.lid), layerName);
1259

1260
        return pin;
1261
    }
1262

1263
    std::vector<LayerPin> getLayerOutPins(const String &layerName)
1264
    {
1265
        int lid = (layerName.empty()) ? 0 : getLayerId(layerName);
1266

1267
        std::vector<LayerPin> pins;
1268

1269
        for (int i = 0; i < layers[lid].outputBlobs.size(); i++)
1270
        {
1271
            pins.push_back(LayerPin(lid, i));
1272
        }
1273

1274
        return pins;
1275
    }
1276

1277
    void connect(int outLayerId, int outNum, int inLayerId, int inNum)
1278
    {
1279
        CV_Assert(outLayerId < inLayerId);
1280
        LayerData &ldOut = getLayerData(outLayerId);
1281
        LayerData &ldInp = getLayerData(inLayerId);
1282

1283
        addLayerInput(ldInp, inNum, LayerPin(outLayerId, outNum));
1284
        ldOut.requiredOutputs.insert(outNum);
1285
        ldOut.consumers.push_back(LayerPin(inLayerId, outNum));
1286
    }
1287

1288
    void initBackend()
1289
    {
1290
        CV_TRACE_FUNCTION();
1291
        if (preferableBackend == DNN_BACKEND_OPENCV)
1292
            CV_Assert(preferableTarget == DNN_TARGET_CPU || IS_DNN_OPENCL_TARGET(preferableTarget));
1293
        else if (preferableBackend == DNN_BACKEND_HALIDE)
1294
            initHalideBackend();
1295
        else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE)
1296
            initInfEngineBackend();
1297
        else if (preferableBackend == DNN_BACKEND_VKCOM)
1298
            initVkComBackend();
1299
        else
1300
            CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
1301
    }
1302

1303
    void initHalideBackend()
1304
    {
1305
        CV_TRACE_FUNCTION();
1306
        CV_Assert_N(preferableBackend == DNN_BACKEND_HALIDE, haveHalide());
1307

1308
        // Iterator to current layer.
1309
        MapIdToLayerData::iterator it = layers.begin();
1310
        // Iterator to base layer for fusion. In example, in case of conv+bn+relu
1311
        // it'll be a conv layer.
1312
        MapIdToLayerData::iterator baseIt = layers.begin();
1313
        for (; it != layers.end(); it++)
1314
        {
1315
            LayerData &ldTop = it->second;
1316
            Ptr<Layer> layerTop = ldTop.layerInstance;
1317
            if (!layerTop->supportBackend(preferableBackend))
1318
            {
1319
                // Move base iterator to layer that don't support preferable
1320
                // backend to prevent fusion over layer of different backend.
1321
                baseIt = it;
1322
                continue;
1323
            }
1324
            // Try to do layers fusion.
1325
            LayerData &ldBot = baseIt->second;
1326
            Ptr<Layer> layerBot = ldBot.layerInstance;
1327
            // 1. Check that bottom and top from the same backends.
1328
            if (it != layers.begin() && layerBot->supportBackend(preferableBackend))
1329
            {
1330
                // 2. Check that current layer works in-place.
1331
                bool inPlace = ldTop.inputBlobs.size() == 1 &&
1332
                               ldBot.outputBlobs.size() == 1 &&
1333
                               ldTop.inputBlobs[0]->data ==
1334
                               ldBot.outputBlobs[0].data;
1335
                if (inPlace)
1336
                {
1337
                    // 3. Try to attach node.
1338
                    CV_Assert(!ldBot.backendNodes[preferableBackend].empty());
1339
                    Ptr<BackendNode> fusedNode =
1340
                        layerTop->tryAttach(ldBot.backendNodes[preferableBackend]);
1341
                    if (!fusedNode.empty())
1342
                    {
1343
                        ldTop.skip = true;
1344
                        ldBot.backendNodes[preferableBackend] = fusedNode;
1345
                        ldBot.outputBlobsWrappers = ldTop.outputBlobsWrappers;
1346
                        continue;
1347
                    }
1348
                }
1349
            }
1350
            // No layers fusion.
1351
            ldTop.skip = false;
1352
            ldTop.backendNodes[DNN_BACKEND_HALIDE] =
1353
                layerTop->initHalide(ldTop.inputBlobsWrappers);
1354
            baseIt = it;
1355
        }
1356
    }
1357

1358
#ifdef HAVE_INF_ENGINE
1359
    // Before launching Inference Engine graph we need to specify output blobs.
1360
    // This function requests output blobs based on inputs references of
1361
    // layers from default backend or layers from different graphs.
1362
    void addInfEngineNetOutputs(LayerData &ld)
1363
    {
1364
        Ptr<InfEngineBackendNet> layerNet;
1365
        if (ld.backendNodes.find(preferableBackend) != ld.backendNodes.end())
1366
        {
1367
            Ptr<BackendNode> node = ld.backendNodes[preferableBackend];
1368
            if (!node.empty())
1369
            {
1370
                Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
1371
                CV_Assert(!ieNode.empty()); CV_Assert(!ieNode->net.empty());
1372
                layerNet = ieNode->net;
1373
            }
1374
        }
1375
        // For an every input reference we check that it belongs to one of
1376
        // the Inference Engine backend graphs. Request an output blob if it is.
1377
        // Do nothing if layer's input is from the same graph.
1378
        for (int i = 0; i < ld.inputBlobsId.size(); ++i)
1379
        {
1380
            LayerData &inpLd = layers[ld.inputBlobsId[i].lid];
1381
            Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
1382
            if (!inpNode.empty())
1383
            {
1384
                Ptr<InfEngineBackendNode> ieInpNode = inpNode.dynamicCast<InfEngineBackendNode>();
1385
                CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty());
1386
                if (layerNet != ieInpNode->net)
1387
                {
1388
                    // layerNet is empty or nodes are from different graphs.
1389
                    ieInpNode->net->addOutput(ieInpNode->layer->name);
1390
                }
1391
            }
1392
        }
1393
    }
1394
#endif  // HAVE_INF_ENGINE
1395

1396
    void initVkComBackend()
1397
    {
1398
        CV_TRACE_FUNCTION();
1399
        CV_Assert(preferableBackend == DNN_BACKEND_VKCOM);
1400
#ifdef HAVE_VULKAN
1401
        if (!haveVulkan())
1402
            return;
1403

1404
        MapIdToLayerData::iterator it = layers.begin();
1405
        for (; it != layers.end(); it++)
1406
        {
1407
            LayerData &ld = it->second;
1408
            Ptr<Layer> layer = ld.layerInstance;
1409
            if (!layer->supportBackend(preferableBackend))
1410
            {
1411
                continue;
1412
            }
1413

1414
            ld.skip = false;
1415
            ld.backendNodes[DNN_BACKEND_VKCOM] =
1416
                layer->initVkCom(ld.inputBlobsWrappers);
1417
        }
1418
#endif
1419
    }
1420

1421
    void initInfEngineBackend()
1422
    {
1423
        CV_TRACE_FUNCTION();
1424
        CV_Assert_N(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE, haveInfEngine());
1425
#ifdef HAVE_INF_ENGINE
1426
        MapIdToLayerData::iterator it;
1427
        Ptr<InfEngineBackendNet> net;
1428

1429
        for (it = layers.begin(); it != layers.end(); ++it)
1430
        {
1431
            LayerData &ld = it->second;
1432
            if (ld.id == 0)
1433
            {
1434
                CV_Assert((netInputLayer->outNames.empty() && ld.outputBlobsWrappers.size() == 1) ||
1435
                          (netInputLayer->outNames.size() == ld.outputBlobsWrappers.size()));
1436
                for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
1437
                {
1438
                    InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
1439
                    dataPtr->name = netInputLayer->outNames.empty() ? ld.name : netInputLayer->outNames[i];
1440
                }
1441
            }
1442
            else
1443
            {
1444
                for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
1445
                {
1446
                    InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
1447
                    dataPtr->name = ld.name;
1448
                }
1449
            }
1450
        }
1451

1452
        if (skipInfEngineInit)
1453
        {
1454
            Ptr<BackendNode> node = layers[lastLayerId].backendNodes[preferableBackend];
1455
            CV_Assert(!node.empty());
1456

1457
            Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
1458
            CV_Assert(!ieNode.empty());
1459

1460
            for (it = layers.begin(); it != layers.end(); ++it)
1461
            {
1462
                LayerData &ld = it->second;
1463
                if (ld.id == 0)
1464
                {
1465
                    for (int i = 0; i < ld.inputBlobsWrappers.size(); ++i)
1466
                    {
1467
                        InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.inputBlobsWrappers[i]);
1468
                        dataPtr->name = netInputLayer->outNames[i];
1469
                    }
1470
                }
1471
                else
1472
                {
1473
                    for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
1474
                    {
1475
                        InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
1476
                        dataPtr->name = ld.name;
1477
                    }
1478
                }
1479
                ieNode->net->addBlobs(ld.inputBlobsWrappers);
1480
                ieNode->net->addBlobs(ld.outputBlobsWrappers);
1481
                ld.skip = true;
1482
            }
1483
            layers[lastLayerId].skip = false;
1484
            ieNode->net->init(preferableTarget);
1485
            return;
1486
        }
1487

1488
        // Build Inference Engine networks from sets of layers that support this
1489
        // backend. Split a whole model on several Inference Engine networks if
1490
        // some of layers is not implemented.
1491

1492
        // Set of all input and output blobs wrappers for current network.
1493
        std::map<LayerPin, Ptr<BackendWrapper> > netBlobsWrappers;
1494
        for (it = layers.begin(); it != layers.end(); ++it)
1495
        {
1496
            LayerData &ld = it->second;
1497
            if (ld.id == 0 && ld.skip)
1498
                continue;
1499
            bool fused = ld.skip;
1500

1501
            Ptr<Layer> layer = ld.layerInstance;
1502
            if (!fused && !layer->supportBackend(preferableBackend))
1503
            {
1504
                addInfEngineNetOutputs(ld);
1505
                net = Ptr<InfEngineBackendNet>();
1506
                netBlobsWrappers.clear();
1507
                layer->preferableTarget = DNN_TARGET_CPU;
1508
                continue;
1509
            }
1510
            ld.skip = true;  // Initially skip all Inference Engine supported layers.
1511

1512
            // Create a new network if one of inputs from different Inference Engine graph.
1513
            for (int i = 0; i < ld.inputBlobsId.size(); ++i)
1514
            {
1515
                LayerData &inpLd = layers[ld.inputBlobsId[i].lid];
1516
                Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
1517
                if (!inpNode.empty())
1518
                {
1519
                    Ptr<InfEngineBackendNode> ieInpNode = inpNode.dynamicCast<InfEngineBackendNode>();
1520
                    CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty());
1521
                    if (ieInpNode->net != net)
1522
                    {
1523
                        net = Ptr<InfEngineBackendNet>();
1524
                        netBlobsWrappers.clear();
1525
                        break;
1526
                    }
1527
                }
1528
            }
1529

1530
            // The same blobs wrappers cannot be shared between two Inference Engine
1531
            // networks because of explicit references between layers and blobs.
1532
            // So we need to rewrap all the external blobs.
1533
            for (int i = 0; i < ld.inputBlobsId.size(); ++i)
1534
            {
1535
                LayerPin inPin = ld.inputBlobsId[i];
1536
                auto it = netBlobsWrappers.find(inPin);
1537
                if (it == netBlobsWrappers.end())
1538
                {
1539
                    ld.inputBlobsWrappers[i] = InfEngineBackendWrapper::create(ld.inputBlobsWrappers[i]);
1540
                    netBlobsWrappers[inPin] = ld.inputBlobsWrappers[i];
1541
                }
1542
                else
1543
                    ld.inputBlobsWrappers[i] = it->second;
1544
            }
1545
            netBlobsWrappers[LayerPin(ld.id, 0)] = ld.outputBlobsWrappers[0];
1546

1547
            Ptr<BackendNode> node;
1548
            if (!net.empty())
1549
            {
1550
                if (fused)
1551
                {
1552
                    bool inPlace = ld.inputBlobsId.size() == 1 && ld.outputBlobs.size() == 1 &&
1553
                                   ld.inputBlobs[0]->data == ld.outputBlobs[0].data;
1554
                    CV_Assert(inPlace);
1555
                    node = layers[ld.inputBlobsId[0].lid].backendNodes[preferableBackend];
1556
                    ld.inputBlobsWrappers = layers[ld.inputBlobsId[0].lid].inputBlobsWrappers;
1557
                }
1558
            }
1559
            else
1560
                net = Ptr<InfEngineBackendNet>(new InfEngineBackendNet());
1561

1562
            if (!fused)
1563
            {
1564
                node = layer->initInfEngine(ld.inputBlobsWrappers);
1565
            }
1566
            else if (node.empty())
1567
                continue;
1568

1569
            CV_Assert(!node.empty());
1570
            ld.backendNodes[preferableBackend] = node;
1571

1572
            Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
1573
            CV_Assert(!ieNode.empty());
1574
            ieNode->net = net;
1575

1576
            auto weightableLayer = std::dynamic_pointer_cast<InferenceEngine::WeightableLayer>(ieNode->layer);
1577
            if ((preferableTarget == DNN_TARGET_OPENCL_FP16 || preferableTarget == DNN_TARGET_MYRIAD) && !fused)
1578
            {
1579
                ieNode->layer->precision = InferenceEngine::Precision::FP16;
1580
                if (weightableLayer)
1581
                {
1582
                    if (weightableLayer->_weights)
1583
                        weightableLayer->_weights = convertFp16(weightableLayer->_weights);
1584
                    if (weightableLayer->_biases)
1585
                        weightableLayer->_biases = convertFp16(weightableLayer->_biases);
1586
                }
1587
                else
1588
                {
1589
                    for (const auto& weights : {"weights", "biases"})
1590
                    {
1591
                        auto it = ieNode->layer->blobs.find(weights);
1592
                        if (it != ieNode->layer->blobs.end())
1593
                            it->second = convertFp16(it->second);
1594
                    }
1595
                }
1596
            }
1597
            if (weightableLayer)
1598
            {
1599
                if (weightableLayer->_weights)
1600
                    weightableLayer->blobs["weights"] = weightableLayer->_weights;
1601
                if (weightableLayer->_biases)
1602
                    weightableLayer->blobs["biases"] = weightableLayer->_biases;
1603
            }
1604
            ieNode->connect(ld.inputBlobsWrappers, ld.outputBlobsWrappers);
1605
            net->addBlobs(ld.inputBlobsWrappers);
1606
            net->addBlobs(ld.outputBlobsWrappers);
1607

1608
            if (!fused)
1609
                net->addLayer(ieNode->layer);
1610
            addInfEngineNetOutputs(ld);
1611
        }
1612

1613
        // Initialize all networks.
1614
        std::set<InfEngineBackendNet> initializedNets;
1615
        for (MapIdToLayerData::reverse_iterator it = layers.rbegin(); it != layers.rend(); ++it)
1616
        {
1617
            LayerData &ld = it->second;
1618
            if (ld.backendNodes.find(preferableBackend) == ld.backendNodes.end())
1619
                continue;
1620

1621
            Ptr<BackendNode> node = ld.backendNodes[preferableBackend];
1622
            if (node.empty())
1623
                continue;
1624

1625
            Ptr<InfEngineBackendNode> ieNode = node.dynamicCast<InfEngineBackendNode>();
1626
            if (ieNode.empty())
1627
                continue;
1628

1629
            CV_Assert(!ieNode->net.empty());
1630

1631
            if (!ieNode->net->isInitialized())
1632
            {
1633
                ieNode->net->init(preferableTarget);
1634
                ld.skip = false;
1635
            }
1636
        }
1637
#endif  // HAVE_INF_ENGINE
1638
    }
1639

1640
    void allocateLayer(int lid, const LayersShapesMap& layersShapes)
1641
    {
1642
        CV_TRACE_FUNCTION();
1643

1644
        LayerData &ld = layers[lid];
1645

1646
        //already allocated
1647
        if (ld.flag)
1648
            return;
1649

1650
        size_t ninputs = ld.inputBlobsId.size();
1651
#if 0
1652
        printf("layer %s:", ld.name.c_str());
1653
        for (size_t i = 0; i < ninputs; i++)
1654
        {
1655
            int inp_lid = ld.inputBlobsId[i].lid;
1656
            LayerData &inp_ld = layers[inp_lid];
1657
            int inp_outputs = (int)inp_ld.outputBlobs.size();
1658
            std::cout << " " << inp_ld.name << "(" << inp_outputs;
1659

1660
            for( int j = 0; j < inp_outputs; j++ )
1661
            {
1662
                std::cout << (j == 0 ? ": " : ", ") << inp_ld.outputBlobs[j].size;
1663
            }
1664
            std::cout << ")";
1665
        }
1666
        printf("\n");
1667
#endif
1668

1669
        //determine parent layers
1670
        for (size_t i = 0; i < ninputs; i++)
1671
            ld.inputLayersId.insert(ld.inputBlobsId[i].lid);
1672

1673
        //allocate parents
1674
        for (set<int>::iterator i = ld.inputLayersId.begin(); i != ld.inputLayersId.end(); i++)
1675
            allocateLayer(*i, layersShapes);
1676

1677
        //bind inputs
1678
        if (ld.id == 0)  // DataLayer
1679
        {
1680
            ninputs = netInputLayer->inputsData.size();
1681
            ld.inputBlobsWrappers.resize(ninputs);
1682
            for (size_t i = 0; i < ninputs; i++)
1683
            {
1684
                ld.inputBlobsWrappers[i] = wrap(netInputLayer->inputsData[i]);
1685
            }
1686
        }
1687
        else
1688
        {
1689
            ld.inputBlobs.resize(ninputs);
1690
            ld.inputBlobsWrappers.resize(ninputs);
1691
            for (size_t i = 0; i < ninputs; i++)
1692
            {
1693
                LayerPin from = ld.inputBlobsId[i];
1694
                CV_Assert(from.valid());
1695
                CV_DbgAssert(layers.count(from.lid) && (int)layers[from.lid].outputBlobs.size() > from.oid);
1696
                ld.inputBlobs[i] = &layers[from.lid].outputBlobs[from.oid];
1697
                ld.inputBlobsWrappers[i] = layers[from.lid].outputBlobsWrappers[from.oid];
1698
            }
1699
        }
1700

1701
        LayersShapesMap::const_iterator layerShapesIt = layersShapes.find(lid);
1702

1703
        CV_Assert(layerShapesIt != layersShapes.end());
1704

1705
        std::vector<LayerPin> pinsForInternalBlobs;
1706
        blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs,
1707
                                          preferableBackend == DNN_BACKEND_OPENCV &&
1708
                                          preferableTarget == DNN_TARGET_OPENCL_FP16);
1709
        ld.outputBlobsWrappers.resize(ld.outputBlobs.size());
1710
        for (int i = 0; i < ld.outputBlobs.size(); ++i)
1711
        {
1712
            ld.outputBlobsWrappers[i] = wrap(ld.outputBlobs[i]);
1713
        }
1714
        ld.internalBlobsWrappers.resize(ld.internals.size());
1715
        for (int i = 0; i < ld.internals.size(); ++i)
1716
        {
1717
            ld.internalBlobsWrappers[i] = wrap(ld.internals[i]);
1718
        }
1719

1720
        Ptr<Layer> layerPtr = ld.getLayerInstance();
1721
        {
1722
            std::vector<Mat> inps(ld.inputBlobs.size());
1723
            for (int i = 0; i < ld.inputBlobs.size(); ++i)
1724
            {
1725
                inps[i] = *ld.inputBlobs[i];
1726
            }
1727
            layerPtr->finalize(inps, ld.outputBlobs);
1728
            layerPtr->preferableTarget = preferableTarget;
1729
#if 0
1730
            std::cout << "\toutputs:";
1731
            size_t noutputs = ld.outputBlobs.size();
1732
            for (size_t j = 0; j < noutputs; j++)
1733
            {
1734
                std::cout << (j == 0 ? " " : ", ") << ld.outputBlobs[j].size;
1735
            }
1736
            std::cout << "\n";
1737
#endif
1738
        }
1739

1740
        // After allocation of layer, we decrease counters to it's input blobs.
1741
        blobManager.releaseReferences(ld.inputBlobsId);
1742
        blobManager.releaseReferences(pinsForInternalBlobs);
1743

1744
        ld.flag = 1;
1745
    }
1746

1747
#if 0
1748
#define printf_(args) printf args
1749
#else
1750
#define printf_(args)
1751
#endif
1752

1753
    void fuseLayers(const std::vector<LayerPin>& blobsToKeep_)
1754
    {
1755
        if( !fusion || preferableBackend != DNN_BACKEND_OPENCV &&
1756
                       preferableBackend != DNN_BACKEND_INFERENCE_ENGINE)
1757
            return;
1758

1759
        CV_TRACE_FUNCTION();
1760

1761
        // scan through all the layers. If there is convolution layer followed by the activation layer,
1762
        // we try to embed this activation into the convolution and disable separate execution of the activation
1763
        std::set<LayerPin> pinsToKeep(blobsToKeep_.begin(),
1764
                                      blobsToKeep_.end());
1765
        MapIdToLayerData::iterator it;
1766
        for (it = layers.begin(); it != layers.end(); it++)
1767
        {
1768
            int lid = it->first;
1769
            LayerData& ld = layers[lid];
1770
            if( ld.skip )
1771
            {
1772
                printf_(("skipped %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str()));
1773
                continue;
1774
            }
1775
            printf_(("analyzing %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str()));
1776

1777
            // the optimization #1. try to fuse batch norm, scaling and/or activation layers
1778
            // with the current layer if they follow it. Normally, the are fused with the convolution layer,
1779
            // but some of them (like activation) may be fused with fully-connected, elemwise (+) and
1780
            // some other layers.
1781
            Ptr<Layer>& currLayer = ld.layerInstance;
1782
            if( ld.consumers.size() == 1 && pinsToKeep.count(LayerPin(lid, 0)) == 0 )
1783
            {
1784
                LayerData* nextData = &layers[ld.consumers[0].lid];
1785
                LayerPin lpNext(ld.consumers[0].lid, 0);
1786
                while (nextData)
1787
                {
1788
                    Ptr<Layer> nextLayer = nextData->layerInstance;
1789
                    if (currLayer->tryFuse(nextLayer))
1790
                    {
1791
                        printf_(("\tfused with %s\n", nextLayer->name.c_str()));
1792
                        nextData->skip = true;
1793
                        ld.outputBlobs = layers[lpNext.lid].outputBlobs;
1794
                        ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers;
1795
                        if (nextData->consumers.size() == 1)
1796
                        {
1797
                            int nextLayerId = nextData->consumers[0].lid;
1798
                            nextData = &layers[nextLayerId];
1799
                            lpNext = LayerPin(nextLayerId, 0);
1800
                        }
1801
                        else
1802
                        {
1803
                            nextData = 0;
1804
                            break;
1805
                        }
1806
                    }
1807
                    else
1808
                        break;
1809
                }
1810

1811
                if (preferableBackend != DNN_BACKEND_OPENCV)
1812
                    continue;  // Go to the next layer.
1813

1814
                // TODO: OpenCL target support more fusion styles.
1815
                if ( preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget) &&
1816
                     (!cv::ocl::useOpenCL() || (ld.layerInstance->type != "Convolution" &&
1817
                     ld.layerInstance->type != "MVN" && ld.layerInstance->type != "Pooling" &&
1818
                     ld.layerInstance->type != "Concat")) )
1819
                    continue;
1820

1821
                while (nextData)
1822
                {
1823
                    // For now, OpenCL target support fusion with activation of ReLU/ChannelsPReLU/Power/Tanh
1824
                    if (IS_DNN_OPENCL_TARGET(preferableTarget) &&
1825
                        nextData->type != "ReLU" &&
1826
                        nextData->type != "ChannelsPReLU" &&
1827
                        nextData->type != "ReLU6" &&
1828
                        nextData->type != "TanH" &&
1829
                        nextData->type != "Power")
1830
                        break;
1831

1832
                    Ptr<ActivationLayer> nextActivLayer = nextData->layerInstance.dynamicCast<ActivationLayer>();
1833
                    if (nextActivLayer.empty())
1834
                        break;
1835

1836
                    if (currLayer->setActivation(nextActivLayer))
1837
                    {
1838
                        printf_(("\tfused with %s\n", nextActivLayer->name.c_str()));
1839
                        nextData->skip = true;
1840
                        ld.outputBlobs = layers[lpNext.lid].outputBlobs;
1841
                        ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers;
1842
                        if (nextData->consumers.size() == 1)
1843
                        {
1844
                            int nextLayerId = nextData->consumers[0].lid;
1845
                            nextData = &layers[nextLayerId];
1846
                            lpNext = LayerPin(nextLayerId, 0);
1847
                        }
1848
                        else
1849
                        {
1850
                            nextData = 0;
1851
                            break;
1852
                        }
1853
                    }
1854
                    else
1855
                        break;
1856
                }
1857

1858
                // fuse convolution layer followed by eltwise + relu
1859
                if ( IS_DNN_OPENCL_TARGET(preferableTarget) )
1860
                {
1861
                    Ptr<EltwiseLayer> nextEltwiseLayer;
1862
                    if( nextData )
1863
                        nextEltwiseLayer = nextData->layerInstance.dynamicCast<EltwiseLayer>();
1864

1865
                    if( !nextEltwiseLayer.empty() && pinsToKeep.count(lpNext) == 0 )
1866
                    {
1867
                        LayerData *eltwiseData = nextData;
1868
                        // go down from the second input and find the first non-skipped layer.
1869
                        LayerData *downLayerData = &layers[eltwiseData->inputBlobsId[1].lid];
1870
                        CV_Assert(downLayerData);
1871
                        while (downLayerData->skip)
1872
                        {
1873
                            downLayerData = &layers[downLayerData->inputBlobsId[0].lid];
1874
                        }
1875
                        CV_Assert(downLayerData);
1876

1877
                        // second input layer is current layer.
1878
                        if ( ld.id == downLayerData->id )
1879
                        {
1880
                            // go down from the first input and find the first non-skipped layer
1881
                            downLayerData = &layers[eltwiseData->inputBlobsId[0].lid];
1882
                            while (downLayerData->skip)
1883
                            {
1884
                                if ( !downLayerData->type.compare("Eltwise") )
1885
                                    downLayerData = &layers[downLayerData->inputBlobsId[1].lid];
1886
                                else
1887
                                    downLayerData = &layers[downLayerData->inputBlobsId[0].lid];
1888
                            }
1889

1890
                            Ptr<ConvolutionLayer> convLayer = downLayerData->layerInstance.dynamicCast<ConvolutionLayer>();
1891

1892
                            //  first input layer is convolution layer
1893
                            if( !convLayer.empty() && eltwiseData->consumers.size() == 1 )
1894
                            {
1895
                                // fuse eltwise + activation layer
1896
                                LayerData *firstConvLayerData = downLayerData;
1897
                                {
1898
                                    nextData = &layers[eltwiseData->consumers[0].lid];
1899
                                    lpNext = LayerPin(eltwiseData->consumers[0].lid, 0);
1900
                                    Ptr<ActivationLayer> nextActivLayer;
1901
                                    if( nextData )
1902
                                        nextActivLayer = nextData->layerInstance.dynamicCast<ActivationLayer>();
1903

1904
                                    if( !nextActivLayer.empty() && pinsToKeep.count(lpNext) == 0 &&
1905
                                            (!nextData->type.compare("ReLU") ||
1906
                                             !nextData->type.compare("ChannelsPReLU") ||
1907
                                             !nextData->type.compare("Power")) &&
1908
                                            currLayer->setActivation(nextActivLayer) )
1909
                                    {
1910
                                        CV_Assert(firstConvLayerData->outputBlobsWrappers.size() == 1 && ld.inputBlobsWrappers.size() == 1);
1911
                                        ld.inputBlobsWrappers.push_back(firstConvLayerData->outputBlobsWrappers[0]);
1912
                                        printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str()));
1913
                                        printf_(("\tfused with %s\n", nextActivLayer->name.c_str()));
1914
                                        eltwiseData->skip = true;
1915
                                        nextData->skip = true;
1916
                                        // This optimization for cases like
1917
                                        // some_layer   conv
1918
                                        //   |             |
1919
                                        //   +-- eltwise --+
1920
                                        //          |
1921
                                        //        activ
1922
                                        // This way all the element-wise computations
1923
                                        // (i.e. some_layer+conv or some_layer*conv)
1924
                                        // would be done at [conv] layer. So we need to
1925
                                        // replace [conv]'s output blob to [eltwise]'s one
1926
                                        // considering that [activ] is an in-place layer.
1927
                                        // Also we need to move all the consumers' references.
1928
                                        // To prevent memory collisions (i.e. when input of
1929
                                        // [conv] and output of [eltwise] is the same blob)
1930
                                        // we allocate a new blob.
1931
                                        CV_Assert_N(ld.outputBlobs.size() == 1, ld.outputBlobsWrappers.size() == 1);
1932
                                        ld.outputBlobs[0] = ld.outputBlobs[0].clone();
1933
                                        ld.outputBlobsWrappers[0] = wrap(ld.outputBlobs[0]);
1934

1935
                                        eltwiseData->outputBlobs = ld.outputBlobs;
1936
                                        nextData->outputBlobs = ld.outputBlobs;
1937
                                        eltwiseData->outputBlobsWrappers = ld.outputBlobsWrappers;
1938
                                        nextData->outputBlobsWrappers = ld.outputBlobsWrappers;
1939

1940
                                        // Move references of [activ] layer consumers to the newly allocated blob.
1941
                                        for (int i = 0; i < nextData->consumers.size(); ++i)
1942
                                        {
1943
                                            LayerData& consumer = layers[nextData->consumers[i].lid];
1944
                                            for (int j = 0; j < consumer.inputBlobsId.size(); ++j)
1945
                                            {
1946
                                                if (consumer.inputBlobsId[j].lid == lpNext.lid)
1947
                                                {
1948
                                                    consumer.inputBlobs[j] = &ld.outputBlobs[0];
1949
                                                    consumer.inputBlobsWrappers[j] = ld.outputBlobsWrappers[0];
1950
                                                    break;
1951
                                                }
1952
                                            }
1953
                                        }
1954
                                    }
1955
                                }
1956
                            }
1957
                        }
1958
                    }
1959
                }
1960
            }
1961

1962
            if (preferableBackend != DNN_BACKEND_OPENCV)
1963
                continue;  // Go to the next layer.
1964

1965
            // the optimization #2. if there is no layer that takes max pooling layer's computed
1966
            // max indices (and only some semantical segmentation networks might need this;
1967
            // many others only take the maximum values), then we switch the max pooling
1968
            // layer to the faster operating mode.
1969
            Ptr<PoolingLayer> poolingLayer = ld.layerInstance.dynamicCast<PoolingLayer>();
1970
            if( !poolingLayer.empty() && !ld.consumers.empty() )
1971
            {
1972
                size_t i = 0, nconsumers = ld.consumers.size();
1973
                for( ; i < nconsumers; i++ )
1974
                    if( ld.consumers[i].oid > 0 )
1975
                        break;
1976
                // if there is no layer that takes the second output pin of the pooling layer
1977
                // on input then we don't need to compute the indices
1978
                if( i >= nconsumers )
1979
                {
1980
                    poolingLayer->computeMaxIdx = false;
1981
                    printf_(("\tsimplified pooling layer %s\n", poolingLayer->name.c_str()));
1982
                }
1983
            }
1984

1985
            // the optimization #3. if there is concat layer that concatenates channels
1986
            // from the inputs together (i.e. axis == 1) then we make the inputs of
1987
            // the concat layer to write to the concatenation output buffer
1988
            // (and so we eliminate the concatenation layer, because the channels
1989
            // are concatenated implicitly).
1990
            Ptr<ConcatLayer> concatLayer = ld.layerInstance.dynamicCast<ConcatLayer>();
1991
            if( !concatLayer.empty() && concatLayer->axis == 1 && !concatLayer->padding &&
1992
                ld.outputBlobs.size() == 1 )
1993
            {
1994
                Mat& output = ld.outputBlobs[0];
1995
                UMat umat_output;
1996
                if (!ld.outputBlobsWrappers.empty() &&
1997
                    (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget)))
1998
                {
1999
                    size_t i, ninputs = ld.inputBlobsId.size();
2000
                    bool conv_layer = true;
2001
                    for( i = 0; i < ninputs; i++ )
2002
                    {
2003
                        LayerPin pin = ld.inputBlobsId[i];
2004
                        LayerData* inp_i_data = &layers[pin.lid];
2005
                        while(inp_i_data->skip &&
2006
                              inp_i_data->inputBlobsId.size() == 1 &&
2007
                              inp_i_data->consumers.size() == 1)
2008
                        {
2009
                            pin = inp_i_data->inputBlobsId[0];
2010
                            inp_i_data = &layers[pin.lid];
2011
                        }
2012
                        conv_layer = conv_layer && (inp_i_data->getLayerInstance()->type == "Convolution");
2013
                    }
2014
                    if (!conv_layer)
2015
                        continue;
2016
                    std::vector<UMat> umat_outputBlobs;
2017
                    umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
2018
                    umat_output = umat_outputBlobs[0];
2019
                }
2020

2021
                // TODO: in general, this optimization can always be done, but
2022
                // many layers currently check that the input/output blobs are
2023
                // continuous arrays. Unfortunately, this is not true when
2024
                // the concatenation optimization is applied with batch_size > 1.
2025
                // so, for now, we only apply this optimization in the most popular
2026
                // case batch_size == 1.
2027
                if( output.dims == 4 && output.size[0] == 1 )
2028
                {
2029
                    size_t i, ninputs = ld.inputBlobsId.size();
2030
                    std::vector<LayerPin> realinputs(ninputs);
2031
                    for( i = 0; i < ninputs; i++ )
2032
                    {
2033
                        LayerPin pin = ld.inputBlobsId[i];
2034
                        LayerData* inp_i_data = &layers[pin.lid];
2035
                        while(inp_i_data->skip &&
2036
                              inp_i_data->inputBlobsId.size() == 1 &&
2037
                              inp_i_data->consumers.size() == 1)
2038
                        {
2039
                            pin = inp_i_data->inputBlobsId[0];
2040
                            inp_i_data = &layers[pin.lid];
2041
                        }
2042
                        printf_(("\treal input for %s is %s\n",
2043
                               layers[ld.inputBlobsId[i].lid].getLayerInstance()->name.c_str(),
2044
                               inp_i_data->getLayerInstance()->name.c_str()));
2045

2046
                        if(inp_i_data->skip || inp_i_data->consumers.size() != 1)
2047
                            break;
2048
                        realinputs[i] = pin;
2049
                    }
2050

2051
                    if( i >= ninputs )
2052
                    {
2053
                        // Allocate new memory to prevent collisions during memory
2054
                        // reusing (see https://github.com/opencv/opencv/pull/10456).
2055
                        output = output.clone();
2056
                        if (preferableBackend == DNN_BACKEND_OPENCV &&
2057
                            IS_DNN_OPENCL_TARGET(preferableTarget))
2058
                        {
2059
                            std::vector<UMat> umats(1);
2060
                            umat_output = umat_output.clone();
2061
                            umats[0] = umat_output;
2062
                            OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umats);
2063
                        }
2064
                        Range chrange[] = { Range::all(), Range::all(), Range::all(), Range::all() };
2065
                        int ofs = 0;
2066
                        for( i = 0; i < ninputs; i++ )
2067
                        {
2068
                            LayerPin pin = realinputs[i];
2069
                            LayerData* inp_i_data = &layers[pin.lid];
2070
                            int channels_i = ld.inputBlobs[i]->size[1];
2071
                            chrange[1] = Range(ofs, ofs + channels_i);
2072
                            printf_(("\toutput %s(%d) to channels (%d, %d)\n", inp_i_data->layerInstance->name.c_str(),
2073
                                   pin.oid, ofs, ofs + channels_i));
2074
                            ofs += channels_i;
2075
                            Mat output_slice = output(chrange);
2076
                            Mat& curr_output = inp_i_data->outputBlobs[pin.oid];
2077
                            CV_Assert(output_slice.isContinuous() && output_slice.size == curr_output.size);
2078
                            Mat* oldPtr = &curr_output;
2079
                            curr_output = output_slice;
2080
                            if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
2081
                            {
2082
                                std::vector<UMat> umats(inp_i_data->outputBlobsWrappers.size());
2083
                                umats[pin.oid] = umat_output(chrange);
2084
                                OpenCLBackendWrapper::update(inp_i_data->outputBlobsWrappers, umats);
2085
                            }
2086
                            // Layers that refer old input Mat will refer to the
2087
                            // new data but the same Mat object.
2088
                            CV_Assert_N(curr_output.data == output_slice.data, oldPtr == &curr_output);
2089
                        }
2090
                        ld.skip = true;
2091
                        printf_(("\toptimized out Concat layer %s\n", concatLayer->name.c_str()));
2092
                    }
2093
                }
2094
            }
2095
        }
2096
    }
2097

2098
    void allocateLayers(const std::vector<LayerPin>& blobsToKeep_)
2099
    {
2100
        CV_TRACE_FUNCTION();
2101

2102
        MapIdToLayerData::iterator it;
2103
        for (it = layers.begin(); it != layers.end(); it++)
2104
            it->second.flag = 0;
2105

2106
        CV_Assert(!layers[0].outputBlobs.empty());
2107
        ShapesVec inputShapes;
2108
        for(int i = 0; i < layers[0].outputBlobs.size(); i++)
2109
        {
2110
            Mat& inp = layers[0].outputBlobs[i];
2111
            CV_Assert(inp.total());
2112
            if (preferableBackend == DNN_BACKEND_OPENCV &&
2113
                preferableTarget == DNN_TARGET_OPENCL_FP16)
2114
            {
2115
                layers[0].outputBlobs[i].create(inp.dims, inp.size, CV_16S);
2116
            }
2117
            inputShapes.push_back(shape(inp));
2118
        }
2119
        LayersShapesMap layersShapes;
2120
        getLayersShapes(inputShapes, layersShapes);
2121

2122
        blobManager.reset();
2123
        backendWrappers.clear();
2124
        // Fake references to input blobs.
2125
        for (int i = 0; i < layers[0].outputBlobs.size(); ++i)
2126
            blobManager.addReference(LayerPin(0, i));
2127
        for (it = layers.begin(); it != layers.end(); ++it)
2128
        {
2129
            const LayerData& ld = it->second;
2130
            blobManager.addReferences(ld.inputBlobsId);
2131
        }
2132

2133
        for (int i = 0; i < blobsToKeep_.size(); i++)
2134
        {
2135
            blobManager.addReference(blobsToKeep_[i]);
2136
        }
2137

2138
        for (it = layers.begin(); it != layers.end(); it++)
2139
        {
2140
            int lid = it->first;
2141
            allocateLayer(lid, layersShapes);
2142
        }
2143

2144
        layersTimings.resize(lastLayerId + 1, 0);
2145
        fuseLayers(blobsToKeep_);
2146
    }
2147

2148
    void forwardLayer(LayerData &ld)
2149
    {
2150
        CV_TRACE_FUNCTION();
2151

2152
        Ptr<Layer> layer = ld.layerInstance;
2153

2154
        TickMeter tm;
2155
        tm.start();
2156

2157
        if( !ld.skip )
2158
        {
2159
            std::map<int, Ptr<BackendNode> >::iterator it = ld.backendNodes.find(preferableBackend);
2160
            if (preferableBackend == DNN_BACKEND_OPENCV || it == ld.backendNodes.end() || it->second.empty())
2161
            {
2162
                if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
2163
                {
2164
                    std::vector<UMat> umat_inputBlobs = OpenCLBackendWrapper::getUMatVector(ld.inputBlobsWrappers);
2165
                    std::vector<UMat> umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
2166
                    std::vector<UMat> umat_internalBlobs = OpenCLBackendWrapper::getUMatVector(ld.internalBlobsWrappers);
2167
                    layer->forward(umat_inputBlobs,
2168
                                   umat_outputBlobs,
2169
                                   umat_internalBlobs);
2170
                    if (DNN_CHECK_NAN_INF)
2171
                    {
2172
                        bool fail = false;
2173
                        for (size_t i = 0; i < umat_outputBlobs.size(); ++i)
2174
                        {
2175
                            UMat& u = umat_outputBlobs[i];
2176
                            Mat m;
2177
                            if (u.depth() == CV_16S) // FP16
2178
                                convertFp16(u, m);
2179
                            else
2180
                                m = u.getMat(ACCESS_READ);
2181
                            if (!checkRange(m))
2182
                            {
2183
                                std::cerr << "WARNING: NaN detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
2184
                                std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
2185
                                fail = true;
2186
                            }
2187
                            else if (!checkRange(m, true, NULL, -1e6, 1e6))
2188
                            {
2189
                                std::cerr << "WARNING: Inf detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
2190
                                std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
2191
                                fail = true;
2192
                            }
2193
                        }
2194
                        if (fail)
2195
                        {
2196
                            for (size_t i = 0; i < umat_inputBlobs.size(); ++i)
2197
                            {
2198
                                UMat& u = umat_inputBlobs[i];
2199
                                Mat m;
2200
                                if (u.depth() == CV_16S) // FP16
2201
                                    convertFp16(u, m);
2202
                                else
2203
                                    m = u.getMat(ACCESS_READ);
2204
                                std::cout << "INPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl;
2205
                                if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
2206
                            }
2207
                            for (size_t i = 0; i < umat_outputBlobs.size(); ++i)
2208
                            {
2209
                                UMat& u = umat_outputBlobs[i];
2210
                                Mat m;
2211
                                if (u.depth() == CV_16S) // FP16
2212
                                    convertFp16(u, m);
2213
                                else
2214
                                    m = u.getMat(ACCESS_READ);
2215
                                std::cout << "OUTPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl;
2216
                                if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
2217
                            }
2218
                            for (size_t i = 0; i < umat_internalBlobs.size(); ++i)
2219
                            {
2220
                                UMat& u = umat_internalBlobs[i];
2221
                                Mat m;
2222
                                if (u.depth() == CV_16S) // FP16
2223
                                    convertFp16(u, m);
2224
                                else
2225
                                    m = u.getMat(ACCESS_READ);
2226
                                std::cout << "INTERNAL " << i << " " << shape(m) << std::endl;
2227
                                if (DNN_CHECK_NAN_INF_DUMP) std::cout << cv::typeToString(u.type()) << " " << m.reshape(1, 1) << std::endl;
2228
                            }
2229
                            if (DNN_CHECK_NAN_INF_RAISE_ERROR)
2230
                                CV_Assert(!fail);
2231
                        }
2232
                    }
2233
                    OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umat_outputBlobs);
2234
                }
2235
                else
2236
                {
2237
                    for (int i = 0, n = ld.inputBlobsWrappers.size(); i < n; ++i)
2238
                    {
2239
                        if (!ld.inputBlobsWrappers[i].empty())
2240
                            ld.inputBlobsWrappers[i]->copyToHost();
2241
                    }
2242

2243
                    std::vector<Mat> inps(ld.inputBlobs.size());
2244
                    for (int i = 0; i < ld.inputBlobs.size(); ++i)
2245
                    {
2246
                        inps[i] = *ld.inputBlobs[i];
2247
                    }
2248
                    layer->forward(inps, ld.outputBlobs, ld.internals);
2249

2250
                    if (DNN_CHECK_NAN_INF)
2251
                    {
2252
                        bool fail = false;
2253
                        for (size_t i = 0; i < ld.outputBlobs.size(); ++i)
2254
                        {
2255
                            const Mat& m = ld.outputBlobs[i];
2256
                            if (!checkRange(m))
2257
                            {
2258
                                std::cerr << "WARNING: NaN detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
2259
                                std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
2260
                                fail = true;
2261
                            }
2262
                            else if (!checkRange(m, true, NULL, -1e6, 1e6))
2263
                            {
2264
                                std::cerr << "WARNING: Inf detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
2265
                                std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
2266
                                fail = true;
2267
                            }
2268
                        }
2269
                        if (fail)
2270
                        {
2271
                            for (size_t i = 0; i < ld.inputBlobs.size(); ++i)
2272
                            {
2273
                                const Mat* pM = ld.inputBlobs[i];
2274
                                if (!pM)
2275
                                {
2276
                                    std::cout << "INPUT " << i << " is NULL" << std::endl;
2277
                                    continue;
2278
                                }
2279
                                const Mat& m = *pM;
2280
                                std::cout << "INPUT " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl;
2281
                                if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
2282
                            }
2283
                            for (size_t i = 0; i < ld.outputBlobs.size(); ++i)
2284
                            {
2285
                                const Mat& m = ld.outputBlobs[i];
2286
                                std::cout << "OUTPUT " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl;
2287
                                if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
2288
                            }
2289
                            for (size_t i = 0; i < ld.internals.size(); ++i)
2290
                            {
2291
                                const Mat& m = ld.internals[i];
2292
                                std::cout << "INTERNAL " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl;
2293
                                if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
2294
                            }
2295
                            if (DNN_CHECK_NAN_INF_RAISE_ERROR)
2296
                                CV_Assert(!fail);
2297
                        }
2298
                    }
2299

2300
                    for (int i = 0, n = ld.outputBlobsWrappers.size(); i < n; ++i)
2301
                    {
2302
                        if (!ld.outputBlobsWrappers[i].empty())
2303
                            ld.outputBlobsWrappers[i]->setHostDirty();
2304
                    }
2305
                }
2306
            }
2307
            else
2308
            {
2309
                Ptr<BackendNode> node = it->second;
2310
                CV_Assert(!node.empty());
2311
                if (preferableBackend == DNN_BACKEND_HALIDE)
2312
                {
2313
                    forwardHalide(ld.outputBlobsWrappers, node);
2314
                }
2315
                else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE)
2316
                {
2317
                    forwardInfEngine(node);
2318
                }
2319
                else if (preferableBackend == DNN_BACKEND_VKCOM)
2320
                {
2321
                    forwardVkCom(ld.outputBlobsWrappers, node);
2322
                }
2323
                else
2324
                {
2325
                    CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
2326
                }
2327
            }
2328
        }
2329
        else
2330
            tm.reset();
2331

2332
        tm.stop();
2333
        layersTimings[ld.id] = tm.getTimeTicks();
2334

2335
        ld.flag = 1;
2336
    }
2337

2338
    void forwardToLayer(LayerData &ld, bool clearFlags = true)
2339
    {
2340
        CV_TRACE_FUNCTION();
2341

2342
        if (clearFlags)
2343
        {
2344
            MapIdToLayerData::iterator it;
2345
            for (it = layers.begin(); it != layers.end(); it++)
2346
                it->second.flag = 0;
2347
        }
2348

2349
        //already was forwarded
2350
        if (ld.flag)
2351
            return;
2352

2353
        //forward parents
2354
        MapIdToLayerData::iterator it;
2355
        for (it = layers.begin(); it != layers.end() && (it->second.id < ld.id); ++it)
2356
        {
2357
            LayerData &ld = it->second;
2358
            if (ld.flag)
2359
                continue;
2360
            forwardLayer(ld);
2361
        }
2362

2363
        //forward itself
2364
        forwardLayer(ld);
2365
    }
2366

2367
    void forwardAll()
2368
    {
2369
        CV_TRACE_FUNCTION();
2370

2371
        MapIdToLayerData::reverse_iterator last_layer = layers.rbegin();
2372
        CV_Assert(last_layer != layers.rend());
2373
        forwardToLayer(last_layer->second, true);
2374
    }
2375

2376
    void getLayerShapesRecursively(int id, LayersShapesMap& inOutShapes)
2377
    {
2378
        std::vector<LayerPin>& inputLayerIds = layers[id].inputBlobsId;
2379

2380
        if (inOutShapes[id].in.empty())
2381
        {
2382
            for(int i = 0; i < inputLayerIds.size(); i++)
2383
            {
2384
                int layerId = inputLayerIds[i].lid;
2385
                LayersShapesMap::iterator it =
2386
                        inOutShapes.find(layerId);
2387
                if(it == inOutShapes.end() ||
2388
                        it->second.out.empty())
2389
                {
2390
                    getLayerShapesRecursively(layerId, inOutShapes);
2391
                }
2392
                const MatShape& shape = inOutShapes[layerId].out[inputLayerIds[i].oid];
2393
                inOutShapes[id].in.push_back(shape);
2394
            }
2395
        }
2396
        const ShapesVec& is = inOutShapes[id].in;
2397
        ShapesVec& os = inOutShapes[id].out;
2398
        ShapesVec& ints = inOutShapes[id].internal;
2399
        int requiredOutputs = layers[id].requiredOutputs.size();
2400
        inOutShapes[id].supportInPlace =
2401
                layers[id].getLayerInstance()->getMemoryShapes(is, requiredOutputs, os, ints);
2402
    }
2403

2404
    void getLayersShapes(const ShapesVec& netInputShapes,
2405
                         LayersShapesMap& inOutShapes)
2406
    {
2407
        inOutShapes.clear();
2408

2409
        inOutShapes[0].in = netInputShapes; //insert shape for first input layer
2410
        for (MapIdToLayerData::iterator it = layers.begin();
2411
             it != layers.end(); it++)
2412
        {
2413
            getLayerShapesRecursively(it->first, inOutShapes);
2414
        }
2415
    }
2416

2417
    void getLayerShapes(const ShapesVec& netInputShapes,
2418
                        const int layerId,
2419
                        LayerShapes& shapes)
2420
    {
2421
        LayersShapesMap inOutShapes;
2422
        inOutShapes[0].in = netInputShapes; //insert shape for first input layer
2423
        getLayerShapesRecursively(layerId, inOutShapes);
2424
        shapes = inOutShapes[layerId];
2425
    }
2426

2427
    LayerPin getLatestLayerPin(const std::vector<LayerPin>& pins)
2428
    {
2429
        return *std::max_element(pins.begin(), pins.end());
2430
    }
2431

2432
    Mat getBlob(const LayerPin& pin)
2433
    {
2434
        CV_TRACE_FUNCTION();
2435

2436
        if (!pin.valid())
2437
            CV_Error(Error::StsObjectNotFound, "Requested blob not found");
2438

2439
        LayerData &ld = layers[pin.lid];
2440
        if ((size_t)pin.oid >= ld.outputBlobs.size())
2441
        {
2442
            CV_Error(Error::StsOutOfRange, format("Layer \"%s\" produce only %zu outputs, "
2443
                                           "the #%d was requested", ld.name.c_str(),
2444
                                           ld.outputBlobs.size(), pin.oid));
2445
        }
2446
        if (preferableTarget != DNN_TARGET_CPU)
2447
        {
2448
            CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty());
2449
            // Transfer data to CPU if it's require.
2450
            ld.outputBlobsWrappers[pin.oid]->copyToHost();
2451
        }
2452

2453
        if (ld.outputBlobs[pin.oid].depth() == CV_16S)
2454
        {
2455
            convertFp16(ld.outputBlobs[pin.oid], output_blob);
2456
            return output_blob;
2457
        }
2458
        else
2459
            return ld.outputBlobs[pin.oid];
2460
    }
2461

2462
    Mat getBlob(String outputName)
2463
    {
2464
        return getBlob(getPinByAlias(outputName));
2465
    }
2466
};
2467

2468
Net::Net() : impl(new Net::Impl)
2469
{
2470
}
2471

2472
Net Net::readFromModelOptimizer(const String& xml, const String& bin)
2473
{
2474
#ifndef HAVE_INF_ENGINE
2475
    CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer.");
2476
#else
2477
    InferenceEngine::CNNNetReader reader;
2478
    reader.ReadNetwork(xml);
2479
    reader.ReadWeights(bin);
2480

2481
    InferenceEngine::CNNNetwork ieNet = reader.getNetwork();
2482

2483
    std::vector<String> inputsNames;
2484
    for (auto& it : ieNet.getInputsInfo())
2485
    {
2486
        inputsNames.push_back(it.first);
2487
    }
2488

2489
    Net cvNet;
2490
    cvNet.setInputsNames(inputsNames);
2491

2492
    Ptr<InfEngineBackendNode> backendNode(new InfEngineBackendNode(0));
2493
    backendNode->net = Ptr<InfEngineBackendNet>(new InfEngineBackendNet(ieNet));
2494
    for (auto& it : ieNet.getOutputsInfo())
2495
    {
2496
        Ptr<Layer> cvLayer(new InfEngineBackendLayer(it.second));
2497
        InferenceEngine::CNNLayerPtr ieLayer = ieNet.getLayerByName(it.first.c_str());
2498
        CV_Assert(ieLayer);
2499

2500
        LayerParams lp;
2501
        int lid = cvNet.addLayer(it.first, "", lp);
2502

2503
        LayerData& ld = cvNet.impl->layers[lid];
2504
        cvLayer->name = it.first;
2505
        cvLayer->type = ieLayer->type;
2506
        ld.layerInstance = cvLayer;
2507
        ld.backendNodes[DNN_BACKEND_INFERENCE_ENGINE] = backendNode;
2508

2509
        for (int i = 0; i < inputsNames.size(); ++i)
2510
            cvNet.connect(0, i, lid, i);
2511
    }
2512
    cvNet.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE);
2513

2514
    cvNet.impl->skipInfEngineInit = true;
2515
    return cvNet;
2516
#endif  // HAVE_INF_ENGINE
2517
}
2518

2519
Net::~Net()
2520
{
2521
}
2522

2523
int Net::addLayer(const String &name, const String &type, LayerParams &params)
2524
{
2525
    CV_TRACE_FUNCTION();
2526

2527
    if (impl->getLayerId(name) >= 0)
2528
    {
2529
        CV_Error(Error::StsBadArg, "Layer \"" + name + "\" already into net");
2530
        return -1;
2531
    }
2532

2533
    int id = ++impl->lastLayerId;
2534
    impl->layerNameToId.insert(std::make_pair(name, id));
2535
    impl->layers.insert(std::make_pair(id, LayerData(id, name, type, params)));
2536

2537
    return id;
2538
}
2539

2540
int Net::addLayerToPrev(const String &name, const String &type, LayerParams &params)
2541
{
2542
    CV_TRACE_FUNCTION();
2543

2544
    int prvLid = impl->lastLayerId;
2545
    int newLid = this->addLayer(name, type, params);
2546
    this->connect(prvLid, 0, newLid, 0);
2547
    return newLid;
2548
}
2549

2550
void Net::connect(int outLayerId, int outNum, int inpLayerId, int inpNum)
2551
{
2552
    CV_TRACE_FUNCTION();
2553

2554
    impl->connect(outLayerId, outNum, inpLayerId, inpNum);
2555
}
2556

2557
void Net::connect(String _outPin, String _inPin)
2558
{
2559
    CV_TRACE_FUNCTION();
2560

2561
    LayerPin outPin = impl->getPinByAlias(_outPin);
2562
    LayerPin inpPin = impl->getPinByAlias(_inPin);
2563

2564
    CV_Assert(outPin.valid() && inpPin.valid());
2565

2566
    impl->connect(outPin.lid, outPin.oid, inpPin.lid, inpPin.oid);
2567
}
2568

2569
Mat Net::forward(const String& outputName)
2570
{
2571
    CV_TRACE_FUNCTION();
2572

2573
    String layerName = outputName;
2574

2575
    if (layerName.empty())
2576
        layerName = getLayerNames().back();
2577

2578
    std::vector<LayerPin> pins(1, impl->getPinByAlias(layerName));
2579
    impl->setUpNet(pins);
2580
    impl->forwardToLayer(impl->getLayerData(layerName));
2581

2582
    return impl->getBlob(layerName);
2583
}
2584

2585
void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName)
2586
{
2587
    CV_TRACE_FUNCTION();
2588

2589
    String layerName = outputName;
2590

2591
    if (layerName.empty())
2592
        layerName = getLayerNames().back();
2593

2594
    std::vector<LayerPin> pins(1, impl->getPinByAlias(layerName));
2595
    impl->setUpNet(pins);
2596
    impl->forwardToLayer(impl->getLayerData(layerName));
2597

2598
    LayerPin pin = impl->getPinByAlias(layerName);
2599
    LayerData &ld = impl->layers[pin.lid];
2600

2601
    if (outputBlobs.isUMat())
2602
    {
2603
        impl->getBlob(layerName).copyTo(outputBlobs);
2604
    }
2605
    else if (outputBlobs.isMat())
2606
    {
2607
        outputBlobs.assign(impl->getBlob(layerName));
2608
    }
2609
    else if (outputBlobs.isMatVector())
2610
    {
2611
        if (impl->preferableTarget != DNN_TARGET_CPU)
2612
        {
2613
            for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
2614
            {
2615
                CV_Assert(!ld.outputBlobsWrappers[i].empty());
2616
                ld.outputBlobsWrappers[i]->copyToHost();
2617
            }
2618
        }
2619
        if (ld.outputBlobs[0].depth() == CV_32F)
2620
        {
2621
            std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
2622
            outputvec = ld.outputBlobs;
2623
        } else {
2624
            std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
2625
            outputvec.resize(ld.outputBlobs.size());
2626
            for (int i = 0; i < outputvec.size(); i++)
2627
                convertFp16(ld.outputBlobs[i], outputvec[i]);
2628
        }
2629
    }
2630
    else if (outputBlobs.isUMatVector())
2631
    {
2632
        std::vector<UMat> & outputvec = *(std::vector<UMat> *)outputBlobs.getObj();
2633

2634
        if (impl->preferableBackend == DNN_BACKEND_OPENCV &&
2635
            IS_DNN_OPENCL_TARGET(impl->preferableTarget))
2636
        {
2637
            if (impl->preferableTarget == DNN_TARGET_OPENCL)
2638
                outputvec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
2639
            else if (impl->preferableTarget == DNN_TARGET_OPENCL_FP16)
2640
            {
2641
                std::vector<UMat> out_vec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
2642
                outputvec.resize(out_vec.size());
2643
                for (int i = 0; i < out_vec.size(); i++)
2644
                    convertFp16(out_vec[i], outputvec[i]);
2645
            }
2646
        }
2647
        else
2648
        {
2649
            outputvec.resize(ld.outputBlobs.size());
2650
            for (int i = 0; i < outputvec.size(); ++i)
2651
                ld.outputBlobs[i].copyTo(outputvec[i]);
2652
        }
2653
    }
2654
}
2655

2656
void Net::forward(OutputArrayOfArrays outputBlobs,
2657
                  const std::vector<String>& outBlobNames)
2658
{
2659
    CV_TRACE_FUNCTION();
2660

2661
    std::vector<LayerPin> pins;
2662
    for (int i = 0; i < outBlobNames.size(); i++)
2663
    {
2664
        pins.push_back(impl->getPinByAlias(outBlobNames[i]));
2665
    }
2666

2667
    impl->setUpNet(pins);
2668

2669
    LayerPin out = impl->getLatestLayerPin(pins);
2670

2671
    impl->forwardToLayer(impl->getLayerData(out.lid));
2672

2673
    std::vector<Mat> matvec;
2674
    for (int i = 0; i < pins.size(); i++)
2675
    {
2676
        matvec.push_back(impl->getBlob(pins[i]));
2677
    }
2678

2679
    std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
2680
    outputvec = matvec;
2681
}
2682

2683
void Net::forward(std::vector<std::vector<Mat> >& outputBlobs,
2684
                     const std::vector<String>& outBlobNames)
2685
{
2686
    CV_TRACE_FUNCTION();
2687

2688
    std::vector<LayerPin> pins;
2689
    for (int i = 0; i < outBlobNames.size(); i++)
2690
    {
2691
        std::vector<LayerPin> lp = impl->getLayerOutPins(outBlobNames[i]);
2692
        pins.insert(pins.end(), lp.begin(), lp.end());
2693
    }
2694

2695
    impl->setUpNet(pins);
2696

2697
    LayerPin out = impl->getLatestLayerPin(pins);
2698

2699
    impl->forwardToLayer(impl->getLayerData(out.lid));
2700

2701
    outputBlobs.resize(outBlobNames.size());
2702
    for (int i = 0; i < outBlobNames.size(); i++)
2703
    {
2704
        std::vector<LayerPin> lp = impl->getLayerOutPins(outBlobNames[i]);
2705
        for (int i = 0; i < lp.size(); i++)
2706
        {
2707
            outputBlobs[i].push_back(impl->getBlob(lp[i]));
2708
        }
2709
    }
2710
}
2711

2712
void Net::setPreferableBackend(int backendId)
2713
{
2714
    CV_TRACE_FUNCTION();
2715
    CV_TRACE_ARG(backendId);
2716

2717
    if( impl->preferableBackend != backendId )
2718
    {
2719
        impl->preferableBackend = backendId;
2720
        impl->netWasAllocated = false;
2721
        impl->clear();
2722
    }
2723
}
2724

2725
void Net::setPreferableTarget(int targetId)
2726
{
2727
    CV_TRACE_FUNCTION();
2728
    CV_TRACE_ARG(targetId);
2729

2730
    if( impl->preferableTarget != targetId )
2731
    {
2732
        impl->preferableTarget = targetId;
2733
        if (IS_DNN_OPENCL_TARGET(targetId))
2734
        {
2735
#ifndef HAVE_OPENCL
2736
#ifdef HAVE_INF_ENGINE
2737
            if (impl->preferableBackend == DNN_BACKEND_OPENCV)
2738
#else
2739
            if (impl->preferableBackend == DNN_BACKEND_DEFAULT ||
2740
                impl->preferableBackend == DNN_BACKEND_OPENCV)
2741
#endif  // HAVE_INF_ENGINE
2742
                impl->preferableTarget = DNN_TARGET_CPU;
2743
#else
2744
            bool fp16 = ocl::Device::getDefault().isExtensionSupported("cl_khr_fp16");
2745
            if (!fp16 && targetId == DNN_TARGET_OPENCL_FP16)
2746
                impl->preferableTarget = DNN_TARGET_OPENCL;
2747
#endif
2748
        }
2749
        impl->netWasAllocated = false;
2750
        impl->clear();
2751
    }
2752
}
2753

2754
void Net::setInputsNames(const std::vector<String> &inputBlobNames)
2755
{
2756
    CV_TRACE_FUNCTION();
2757

2758
    impl->netInputLayer->setNames(inputBlobNames);
2759
}
2760

2761
void Net::setInput(InputArray blob, const String& name, double scalefactor, const Scalar& mean)
2762
{
2763
    CV_TRACE_FUNCTION();
2764
    CV_TRACE_ARG_VALUE(name, "name", name.c_str());
2765

2766
    LayerPin pin;
2767
    pin.lid = 0;
2768
    pin.oid = impl->resolvePinOutputName(impl->getLayerData(pin.lid), name);
2769

2770
    if (!pin.valid())
2771
        CV_Error(Error::StsObjectNotFound, "Requested blob \"" + name + "\" not found");
2772

2773
    LayerData &ld = impl->layers[pin.lid];
2774
    const int numInputs = std::max(pin.oid+1, (int)ld.requiredOutputs.size());
2775
    ld.outputBlobs.resize(numInputs);
2776
    ld.outputBlobsWrappers.resize(numInputs);
2777
    impl->netInputLayer->inputsData.resize(numInputs);
2778
    impl->netInputLayer->scaleFactors.resize(numInputs);
2779
    impl->netInputLayer->means.resize(numInputs);
2780

2781
    MatShape prevShape = shape(impl->netInputLayer->inputsData[pin.oid]);
2782
    Mat blob_ = blob.getMat();
2783
    bool oldShape = prevShape == shape(blob_);
2784
    if (oldShape)
2785
    {
2786
        blob_.copyTo(impl->netInputLayer->inputsData[pin.oid]);
2787
    }
2788
    else
2789
    {
2790
        ld.outputBlobs[pin.oid] = blob_.clone();
2791
        impl->netInputLayer->inputsData[pin.oid] = ld.outputBlobs[pin.oid];
2792
    }
2793

2794
    if (!ld.outputBlobsWrappers[pin.oid].empty())
2795
    {
2796
        ld.outputBlobsWrappers[pin.oid]->setHostDirty();
2797
    }
2798
    impl->netInputLayer->scaleFactors[pin.oid] = scalefactor;
2799
    impl->netInputLayer->means[pin.oid] = mean;
2800
    impl->netWasAllocated = impl->netWasAllocated && oldShape;
2801
}
2802

2803
Mat Net::getParam(LayerId layer, int numParam)
2804
{
2805
    LayerData &ld = impl->getLayerData(layer);
2806
    std::vector<Mat> &layerBlobs = ld.getLayerInstance()->blobs;
2807
    CV_Assert(numParam < (int)layerBlobs.size());
2808
    return layerBlobs[numParam];
2809
}
2810

2811
void Net::setParam(LayerId layer, int numParam, const Mat &blob)
2812
{
2813
    LayerData &ld = impl->getLayerData(layer);
2814

2815
    std::vector<Mat> &layerBlobs = ld.getLayerInstance()->blobs;
2816
    CV_Assert(numParam < (int)layerBlobs.size());
2817
    //we don't make strong checks, use this function carefully
2818
    layerBlobs[numParam] = blob;
2819
}
2820

2821
int Net::getLayerId(const String &layer)
2822
{
2823
    return impl->getLayerId(layer);
2824
}
2825

2826
Ptr<Layer> Net::getLayer(LayerId layerId)
2827
{
2828
    LayerData &ld = impl->getLayerData(layerId);
2829
    return ld.getLayerInstance();
2830
}
2831

2832
std::vector<Ptr<Layer> > Net::getLayerInputs(LayerId layerId)
2833
{
2834
    LayerData &ld = impl->getLayerData(layerId);
2835
    if (!ld.layerInstance)
2836
        CV_Error(Error::StsNullPtr, format("Requested layer \"%s\" was not initialized", ld.name.c_str()));
2837

2838
    std::vector<Ptr<Layer> > inputLayers;
2839
    inputLayers.reserve(ld.inputLayersId.size());
2840
    std::set<int>::iterator it;
2841
    for (it = ld.inputLayersId.begin(); it != ld.inputLayersId.end(); ++it) {
2842
        inputLayers.push_back(getLayer(*it));
2843
    }
2844
    return inputLayers;
2845
}
2846

2847
std::vector<String> Net::getLayerNames() const
2848
{
2849
    std::vector<String> res;
2850
    res.reserve(impl->layers.size());
2851

2852
    Impl::MapIdToLayerData::iterator it;
2853
    for (it = impl->layers.begin(); it != impl->layers.end(); it++)
2854
    {
2855
        if (it->second.id) //skip Data layer
2856
            res.push_back(it->second.name);
2857
    }
2858

2859
    return res;
2860
}
2861

2862
bool Net::empty() const
2863
{
2864
    return impl->layers.size() <= 1; //first layer is default Data layer
2865
}
2866

2867
std::vector<int> Net::getUnconnectedOutLayers() const
2868
{
2869
    std::vector<int> layersIds;
2870

2871
    Impl::MapIdToLayerData::iterator it;
2872
    for (it = impl->layers.begin(); it != impl->layers.end(); it++)
2873
    {
2874
        int lid = it->first;
2875
        LayerData &ld = it->second;
2876

2877
        if (ld.requiredOutputs.size() == 0)
2878
            layersIds.push_back(lid);
2879
    }
2880

2881
    return layersIds;
2882
}
2883

2884
std::vector<String> Net::getUnconnectedOutLayersNames() const
2885
{
2886
    std::vector<int> ids = getUnconnectedOutLayers();
2887
    const size_t n = ids.size();
2888
    std::vector<String> names(n);
2889
    for (size_t i = 0; i < n; ++i)
2890
    {
2891
        names[i] = impl->layers[ids[i]].name;
2892
    }
2893
    return names;
2894
}
2895

2896
void Net::getLayersShapes(const ShapesVec& netInputShapes,
2897
                          std::vector<int>& layersIds,
2898
                          std::vector<ShapesVec>& inLayersShapes,
2899
                          std::vector<ShapesVec>& outLayersShapes) const
2900
{
2901
    layersIds.clear();
2902
    inLayersShapes.clear();
2903
    outLayersShapes.clear();
2904

2905
    Impl::LayersShapesMap inOutShapes;
2906
    impl->getLayersShapes(netInputShapes, inOutShapes);
2907

2908
    for(Impl::LayersShapesMap::const_iterator it = inOutShapes.begin();
2909
        it != inOutShapes.end(); it++)
2910
    {
2911
        layersIds.push_back(it->first);
2912
        inLayersShapes.push_back(it->second.in);
2913
        outLayersShapes.push_back(it->second.out);
2914
    }
2915
}
2916

2917
void Net::getLayersShapes(const MatShape& netInputShape,
2918
                          std::vector<int>& layerIds,
2919
                          std::vector<ShapesVec>& inLayersShapes,
2920
                          std::vector<ShapesVec>& outLayersShapes) const
2921
{
2922
    getLayersShapes(ShapesVec(1, netInputShape),
2923
                    layerIds, inLayersShapes, outLayersShapes);
2924
}
2925

2926
void Net::getLayerShapes(const MatShape& netInputShape,
2927
                         const int layerId,
2928
                         ShapesVec& inLayerShapes,
2929
                         ShapesVec& outLayerShapes) const
2930
{
2931
    getLayerShapes(ShapesVec(1, netInputShape),
2932
                   layerId, inLayerShapes, outLayerShapes);
2933

2934
}
2935

2936
void Net::getLayerShapes(const ShapesVec& netInputShapes,
2937
                    const int layerId,
2938
                    ShapesVec& inLayerShapes,
2939
                    ShapesVec& outLayerShapes) const
2940
{
2941
    LayerShapes shapes;
2942
    impl->getLayerShapes(netInputShapes, layerId, shapes);
2943
    inLayerShapes = shapes.in;
2944
    outLayerShapes = shapes.out;
2945
}
2946

2947
int64 Net::getFLOPS(const std::vector<MatShape>& netInputShapes) const
2948
{
2949
    CV_TRACE_FUNCTION();
2950

2951
    int64 flops = 0;
2952
    std::vector<int> ids;
2953
    std::vector<std::vector<MatShape> > inShapes, outShapes;
2954
    getLayersShapes(netInputShapes, ids, inShapes, outShapes);
2955
    CV_Assert(inShapes.size() == outShapes.size());
2956
    CV_Assert(inShapes.size() == ids.size());
2957

2958
    for(int i = 0; i < ids.size(); i++)
2959
    {
2960
        flops += impl->layers[ids[i]].getLayerInstance()->getFLOPS(inShapes[i],
2961
                                                                   outShapes[i]);
2962
    }
2963

2964
    return flops;
2965
}
2966

2967
int64 Net::getFLOPS(const MatShape& netInputShape) const
2968
{
2969
    return getFLOPS(std::vector<MatShape>(1, netInputShape));
2970
}
2971

2972
int64 Net::getFLOPS(const int layerId,
2973
              const std::vector<MatShape>& netInputShapes) const
2974
{
2975
    Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerId);
2976
    CV_Assert(layer != impl->layers.end());
2977

2978
    LayerShapes shapes;
2979
    impl->getLayerShapes(netInputShapes, layerId, shapes);
2980

2981
    return layer->second.getLayerInstance()->getFLOPS(shapes.in, shapes.out);
2982
}
2983

2984
int64 Net::getFLOPS(const int layerId,
2985
              const MatShape& netInputShape) const
2986
{
2987
    return getFLOPS(layerId, std::vector<MatShape>(1, netInputShape));
2988
}
2989

2990
void Net::getLayerTypes(std::vector<String>& layersTypes) const
2991
{
2992
    layersTypes.clear();
2993

2994
    std::map<String, int> layers;
2995
    for (Impl::MapIdToLayerData::iterator it = impl->layers.begin();
2996
         it != impl->layers.end(); it++)
2997
    {
2998
        if (layers.find(it->second.type) == layers.end())
2999
            layers[it->second.type] = 0;
3000
        layers[it->second.type]++;
3001
    }
3002

3003
    for (std::map<String, int>::iterator it = layers.begin();
3004
         it != layers.end(); it++)
3005
    {
3006
        layersTypes.push_back(it->first);
3007
    }
3008
}
3009

3010
int Net::getLayersCount(const String& layerType) const
3011
{
3012
    int count = 0;
3013
    for (Impl::MapIdToLayerData::iterator it = impl->layers.begin();
3014
         it != impl->layers.end(); it++)
3015
    {
3016
        if (it->second.type == layerType)
3017
            count++;
3018
    }
3019
    return count;
3020
}
3021

3022
void Net::getMemoryConsumption(const int layerId,
3023
                               const std::vector<MatShape>& netInputShapes,
3024
                               size_t& weights, size_t& blobs) const
3025
{
3026
    CV_TRACE_FUNCTION();
3027

3028
    Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerId);
3029
    CV_Assert(layer != impl->layers.end());
3030

3031
    weights = blobs = 0;
3032

3033
    for(int i = 0; i < layer->second.params.blobs.size(); i++)
3034
    {
3035
        const Mat& weightsBlob = layer->second.params.blobs[i];
3036
        weights += weightsBlob.total()*weightsBlob.elemSize();
3037
    }
3038

3039
    ShapesVec inLayerShapes, outLayerShapes;
3040
    getLayerShapes(netInputShapes, layerId, inLayerShapes, outLayerShapes);
3041
    for(int i = 0; i < outLayerShapes.size(); i++)
3042
    {
3043
        blobs += total(outLayerShapes[i]) * sizeof(float);
3044
    }
3045
}
3046

3047
void Net::getMemoryConsumption(const std::vector<MatShape>& netInputShapes,
3048
                               size_t& weights, size_t& blobs) const
3049
{
3050
    CV_TRACE_FUNCTION();
3051

3052
    std::vector<int> layerIds;
3053
    std::vector<size_t> w, b;
3054
    getMemoryConsumption(netInputShapes, layerIds, w, b);
3055

3056
    weights = blobs = 0;
3057
    for(int i = 0; i < layerIds.size(); i++)
3058
    {
3059
        weights += w[i];
3060
        blobs += b[i];
3061
    }
3062
}
3063

3064
void Net::getMemoryConsumption(const int layerId,
3065
                               const MatShape& netInputShape,
3066
                               size_t& weights, size_t& blobs) const
3067
{
3068
    getMemoryConsumption(layerId, std::vector<MatShape>(1, netInputShape),
3069
                         weights, blobs);
3070
}
3071

3072
void Net::getMemoryConsumption(const MatShape& netInputShape,
3073
                               size_t& weights, size_t& blobs) const
3074
{
3075
    getMemoryConsumption(std::vector<MatShape>(1, netInputShape),
3076
                         weights, blobs);
3077
}
3078

3079
void Net::getMemoryConsumption(const std::vector<MatShape>& netInputShapes,
3080
                                  std::vector<int>& layerIds, std::vector<size_t>& weights,
3081
                                  std::vector<size_t>& blobs) const
3082
{
3083
    CV_TRACE_FUNCTION();
3084

3085
    layerIds.clear();
3086
    weights.clear();
3087
    blobs.clear();
3088

3089
    std::vector<std::vector<MatShape> > inLayerShapes, outLayerShapes;
3090

3091
    getLayersShapes(netInputShapes, layerIds, inLayerShapes, outLayerShapes);
3092

3093
    for(int i = 0; i < layerIds.size(); i++)
3094
    {
3095
        int w = 0, b = 0;
3096
        Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerIds[i]);
3097
        CV_Assert(layer != impl->layers.end());
3098

3099
        for(int j = 0; j < layer->second.params.blobs.size(); j++)
3100
        {
3101
            const Mat& weightsBlob = layer->second.params.blobs[j];
3102
            w += weightsBlob.total()*weightsBlob.elemSize();
3103
        }
3104

3105
        for(int j = 0; j < outLayerShapes[i].size(); j++)
3106
        {
3107
            b += total(outLayerShapes[i][j]) * sizeof(float);
3108
        }
3109

3110
        weights.push_back(w);
3111
        blobs.push_back(b);
3112
    }
3113
}
3114

3115
void Net::getMemoryConsumption(const MatShape& netInputShape, std::vector<int>& layerIds,
3116
                               std::vector<size_t>& weights, std::vector<size_t>& blobs) const
3117
{
3118
    getMemoryConsumption(std::vector<MatShape>(1, netInputShape), layerIds,
3119
                         weights, blobs);
3120
}
3121

3122
void Net::enableFusion(bool fusion)
3123
{
3124
    if( impl->fusion != fusion )
3125
    {
3126
        impl->fusion = fusion;
3127
        impl->netWasAllocated = false;
3128
        impl->clear();
3129
    }
3130
}
3131

3132
void Net::setHalideScheduler(const String& scheduler)
3133
{
3134
    CV_TRACE_FUNCTION();
3135
    CV_TRACE_ARG_VALUE(scheduler, "scheduler", scheduler.c_str());
3136

3137
    impl->halideConfigFile = scheduler;
3138
}
3139

3140
int64 Net::getPerfProfile(std::vector<double>& timings)
3141
{
3142
    timings = std::vector<double>(impl->layersTimings.begin() + 1, impl->layersTimings.end());
3143
    int64 total = std::accumulate(timings.begin(), timings.end(), 0);
3144
    return total;
3145
}
3146

3147
//////////////////////////////////////////////////////////////////////////
3148

3149
Layer::Layer() { preferableTarget = DNN_TARGET_CPU; }
3150

3151
Layer::Layer(const LayerParams &params)
3152
    : blobs(params.blobs), name(params.name), type(params.type)
3153
{
3154
    preferableTarget = DNN_TARGET_CPU;
3155
}
3156

3157
void Layer::setParamsFrom(const LayerParams &params)
3158
{
3159
    blobs = params.blobs;
3160
    name = params.name;
3161
    type = params.type;
3162
}
3163

3164
int Layer::inputNameToIndex(String)
3165
{
3166
    return -1;
3167
}
3168

3169
int Layer::outputNameToIndex(const String&)
3170
{
3171
    return 0;
3172
}
3173

3174
bool Layer::supportBackend(int backendId)
3175
{
3176
    return backendId == DNN_BACKEND_OPENCV;
3177
}
3178

3179
Ptr<BackendNode> Layer::initVkCom(const std::vector<Ptr<BackendWrapper> > &)
3180
{
3181
    CV_Error(Error::StsNotImplemented, "VkCom pipeline of " + type +
3182
                                       " layers is not defined.");
3183
    return Ptr<BackendNode>();
3184
}
3185

3186
Ptr<BackendNode> Layer::initHalide(const std::vector<Ptr<BackendWrapper> > &)
3187
{
3188
    CV_Error(Error::StsNotImplemented, "Halide pipeline of " + type +
3189
                                       " layers is not defined.");
3190
    return Ptr<BackendNode>();
3191
}
3192

3193
Ptr<BackendNode> Layer::initInfEngine(const std::vector<Ptr<BackendWrapper> > &)
3194
{
3195
    CV_Error(Error::StsNotImplemented, "Inference Engine pipeline of " + type +
3196
                                       " layers is not defined.");
3197
    return Ptr<BackendNode>();
3198
}
3199

3200
void Layer::applyHalideScheduler(Ptr<BackendNode>& node, const std::vector<Mat*> &inputs,
3201
                                 const std::vector<Mat> &outputs, int targetId) const
3202
{
3203
#ifdef  HAVE_HALIDE
3204
    CV_TRACE_FUNCTION();
3205

3206
    Halide::Var x("x"), y("y"), c("c"), n("n"), co("co"), ci("ci"),
3207
                xo("xo"), xi("xi"), yo("yo"), yi("yi"), tile("tile");
3208
    Halide::Func& top = node.dynamicCast<HalideBackendNode>()->funcs.back();
3209

3210
    int outW, outH, outC, outN;
3211
    getCanonicalSize(outputs[0].size, &outW, &outH, &outC, &outN);
3212

3213
    if (targetId == DNN_TARGET_CPU)
3214
    {
3215
        if (outW == 1 && outH == 1)
3216
        {
3217
            if (outC + outN == 1)
3218
                return;
3219

3220
            if (outC > 8)
3221
              top.split(c, co, ci, 8)
3222
                 .fuse(x, y, tile).fuse(co, tile, tile).fuse(n, tile, tile)
3223
                 .parallel(tile)
3224
                 .vectorize(ci, 8);
3225
            else
3226
              top.fuse(x, y, tile).fuse(c, tile, tile).fuse(n, tile, tile)
3227
                 .parallel(tile);
3228
        }
3229
        else
3230
        {
3231
            if (outH > 2)
3232
            {
3233
                top.reorder(x, c, y)
3234
                   .split(y, yo, yi, 2)
3235
                   .fuse(yo, n, tile)
3236
                   .parallel(tile)
3237
                   .unroll(yi)
3238
                   .vectorize(x, outW >= 16 ? 16 : outW);
3239
            }
3240
        }
3241
    }
3242
    else if (targetId == DNN_TARGET_OPENCL)
3243
    {
3244
        if (outW == 1 && outH == 1)
3245
        {
3246
            int c_split = outC > 8 ? (outC > 16 ? 8 : 4) : outC;
3247
            top.split(c, co, ci, c_split)
3248
               .fuse(x, y, tile).fuse(co, tile, tile).fuse(n, tile, tile)
3249
               .gpu_blocks(tile)
3250
               .gpu_threads(ci);
3251
        }
3252
        else
3253
        {
3254
            int x_split = outW > 8 ? (outW >= 32 ? 16 : 8) : outW;
3255
            int y_split = outH > 8 ? (outH >= 32 ? 16 : 8) : outH;
3256
            // Supported vectorization widths: 2, 3, 4, 8, 16
3257
            int c_split = outC > 8 ? (outC > 16 ? 8 : 4) : std::min(4, outC);
3258
            top.split(x, xo, xi, x_split).split(y, yo, yi, y_split)
3259
               .split(c, co, ci, c_split)
3260
               .gpu_blocks(xo, yo, co)
3261
               .gpu_threads(xi, yi)
3262
               .reorder(xi, yi, ci, xo, yo, co)
3263
               .vectorize(ci);
3264
        }
3265
    }
3266
    else
3267
        CV_Error(Error::StsNotImplemented, "Unknown target identifier");
3268
#endif  // HAVE_HALIDE
3269
}
3270

3271
Ptr<BackendNode> Layer::tryAttach(const Ptr<BackendNode>& node)
3272
{
3273
    return Ptr<BackendNode>();
3274
}
3275

3276
bool Layer::setActivation(const Ptr<ActivationLayer>&) { return false; }
3277
bool Layer::tryFuse(Ptr<Layer>&) { return false; }
3278
void Layer::getScaleShift(Mat& scale, Mat& shift) const
3279
{
3280
    scale = Mat();
3281
    shift = Mat();
3282
}
3283

3284
void Layer::unsetAttached()
3285
{
3286
    setActivation(Ptr<ActivationLayer>());
3287
}
3288

3289
template <typename T>
3290
static void vecToPVec(const std::vector<T> &v, std::vector<T*> &pv)
3291
{
3292
    pv.resize(v.size());
3293
    for (size_t i = 0; i < v.size(); i++)
3294
        pv[i] = const_cast<T*>(&v[i]);
3295
}
3296

3297
void Layer::finalize(const std::vector<Mat> &inputs, std::vector<Mat> &outputs)
3298
{
3299
    CV_TRACE_FUNCTION();
3300
    this->finalize((InputArrayOfArrays)inputs, (OutputArrayOfArrays)outputs);
3301
}
3302

3303
void Layer::finalize(const std::vector<Mat*> &input, std::vector<Mat> &output)
3304
{
3305
    CV_UNUSED(input);CV_UNUSED(output);
3306
}
3307

3308
void Layer::finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr)
3309
{
3310
    CV_TRACE_FUNCTION();
3311
    std::vector<Mat> inputs, outputs;
3312
    inputs_arr.getMatVector(inputs);
3313
    outputs_arr.getMatVector(outputs);
3314

3315
    std::vector<Mat*> inputsp;
3316
    vecToPVec(inputs, inputsp);
3317
    this->finalize(inputsp, outputs);
3318
}
3319

3320
std::vector<Mat> Layer::finalize(const std::vector<Mat> &inputs)
3321
{
3322
    CV_TRACE_FUNCTION();
3323

3324
    std::vector<Mat> outputs;
3325
    this->finalize(inputs, outputs);
3326
    return outputs;
3327
}
3328

3329
void Layer::forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &internals)
3330
{
3331
    // We kept this method for compatibility. DNN calls it now only to support users' implementations.
3332
}
3333

3334
void Layer::forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
3335
{
3336
    CV_TRACE_FUNCTION();
3337
    CV_TRACE_ARG_VALUE(name, "name", name.c_str());
3338

3339
    Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
3340
}
3341

3342
void Layer::forward_fallback(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
3343
{
3344
    CV_TRACE_FUNCTION();
3345
    CV_TRACE_ARG_VALUE(name, "name", name.c_str());
3346

3347
    if (preferableTarget == DNN_TARGET_OPENCL_FP16 && inputs_arr.depth() == CV_16S)
3348
    {
3349
        std::vector<UMat> inputs;
3350
        std::vector<UMat> outputs;
3351
        std::vector<UMat> internals;
3352

3353
        std::vector<UMat> orig_inputs;
3354
        std::vector<UMat> orig_outputs;
3355
        std::vector<UMat> orig_internals;
3356

3357
        inputs_arr.getUMatVector(orig_inputs);
3358
        outputs_arr.getUMatVector(orig_outputs);
3359
        internals_arr.getUMatVector(orig_internals);
3360

3361
        inputs.resize(orig_inputs.size());
3362
        for (size_t i = 0; i < orig_inputs.size(); i++)
3363
            convertFp16(orig_inputs[i], inputs[i]);
3364

3365
        outputs.resize(orig_outputs.size());
3366
        for (size_t i = 0; i < orig_outputs.size(); i++)
3367
            outputs[i].create(shape(orig_outputs[i]), CV_32F);
3368

3369
        internals.resize(orig_internals.size());
3370
        for (size_t i = 0; i < orig_internals.size(); i++)
3371
            internals[i].create(shape(orig_internals[i]), CV_32F);
3372

3373
        forward(inputs, outputs, internals);
3374

3375
        for (size_t i = 0; i < outputs.size(); i++)
3376
            convertFp16(outputs[i], orig_outputs[i]);
3377

3378
        // sync results back
3379
        outputs_arr.assign(orig_outputs);
3380
        internals_arr.assign(orig_internals);
3381
        return;
3382
    }
3383
    std::vector<Mat> inpvec;
3384
    std::vector<Mat> outputs;
3385
    std::vector<Mat> internals;
3386

3387
    inputs_arr.getMatVector(inpvec);
3388
    outputs_arr.getMatVector(outputs);
3389
    internals_arr.getMatVector(internals);
3390

3391
    std::vector<Mat*> inputs(inpvec.size());
3392
    for (int i = 0; i < inpvec.size(); i++)
3393
        inputs[i] = &inpvec[i];
3394

3395
    this->forward(inputs, outputs, internals);
3396

3397
    // sync results back
3398
    outputs_arr.assign(outputs);
3399
    internals_arr.assign(internals);
3400
}
3401

3402
void Layer::run(const std::vector<Mat> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
3403
{
3404
    CV_TRACE_FUNCTION();
3405

3406
    this->finalize(inputs, outputs);
3407
    this->forward(inputs, outputs, internals);
3408
}
3409

3410
Layer::~Layer() {}
3411

3412
bool Layer::getMemoryShapes(const std::vector<MatShape> &inputs,
3413
                            const int requiredOutputs,
3414
                            std::vector<MatShape> &outputs,
3415
                            std::vector<MatShape> &internals) const
3416
{
3417
    CV_Assert(inputs.size());
3418
    outputs.assign(std::max(requiredOutputs, (int)inputs.size()), inputs[0]);
3419
    return false;
3420
}
3421

3422
//////////////////////////////////////////////////////////////////////////
3423

3424
static Mutex& getLayerFactoryMutex()
3425
{
3426
    static Mutex* volatile instance = NULL;
3427
    if (instance == NULL)
3428
    {
3429
        cv::AutoLock lock(getInitializationMutex());
3430
        if (instance == NULL)
3431
            instance = new Mutex();
3432
    }
3433
    return *instance;
3434
}
3435

3436
typedef std::map<String, std::vector<LayerFactory::Constructor> > LayerFactory_Impl;
3437

3438
static LayerFactory_Impl& getLayerFactoryImpl_()
3439
{
3440
    static LayerFactory_Impl impl;
3441
    return impl;
3442
}
3443

3444
static LayerFactory_Impl& getLayerFactoryImpl()
3445
{
3446
    static LayerFactory_Impl* volatile instance = NULL;
3447
    if (instance == NULL)
3448
    {
3449
        cv::AutoLock lock(getLayerFactoryMutex());
3450
        if (instance == NULL)
3451
        {
3452
            instance = &getLayerFactoryImpl_();
3453
            initializeLayerFactory();
3454
        }
3455
    }
3456
    return *instance;
3457
}
3458

3459
void LayerFactory::registerLayer(const String &type, Constructor constructor)
3460
{
3461
    CV_TRACE_FUNCTION();
3462
    CV_TRACE_ARG_VALUE(type, "type", type.c_str());
3463

3464
    cv::AutoLock lock(getLayerFactoryMutex());
3465
    String type_ = toLowerCase(type);
3466
    LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type_);
3467

3468
    if (it != getLayerFactoryImpl().end())
3469
    {
3470
        if (it->second.back() == constructor)
3471
            CV_Error(cv::Error::StsBadArg, "Layer \"" + type_ + "\" already was registered");
3472
        it->second.push_back(constructor);
3473
    }
3474
    getLayerFactoryImpl().insert(std::make_pair(type_, std::vector<Constructor>(1, constructor)));
3475
}
3476

3477
void LayerFactory::unregisterLayer(const String &type)
3478
{
3479
    CV_TRACE_FUNCTION();
3480
    CV_TRACE_ARG_VALUE(type, "type", type.c_str());
3481

3482
    cv::AutoLock lock(getLayerFactoryMutex());
3483
    String type_ = toLowerCase(type);
3484

3485
    LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type_);
3486
    if (it != getLayerFactoryImpl().end())
3487
    {
3488
        if (it->second.size() > 1)
3489
            it->second.pop_back();
3490
        else
3491
            getLayerFactoryImpl().erase(it);
3492
    }
3493
}
3494

3495
Ptr<Layer> LayerFactory::createLayerInstance(const String &type, LayerParams& params)
3496
{
3497
    CV_TRACE_FUNCTION();
3498
    CV_TRACE_ARG_VALUE(type, "type", type.c_str());
3499

3500
    cv::AutoLock lock(getLayerFactoryMutex());
3501
    String type_ = toLowerCase(type);
3502
    LayerFactory_Impl::const_iterator it = getLayerFactoryImpl().find(type_);
3503

3504
    if (it != getLayerFactoryImpl().end())
3505
    {
3506
        CV_Assert(!it->second.empty());
3507
        return it->second.back()(params);
3508
    }
3509
    else
3510
    {
3511
        return Ptr<Layer>(); //NULL
3512
    }
3513
}
3514

3515
BackendNode::BackendNode(int backendId) : backendId(backendId) {}
3516

3517
BackendNode::~BackendNode() {};
3518

3519
BackendWrapper::BackendWrapper(int backendId, int targetId)
3520
    : backendId(backendId), targetId(targetId) {}
3521

3522
BackendWrapper::BackendWrapper(int targetId, const cv::Mat& m)
3523
{
3524
    CV_Error(Error::StsNotImplemented,
3525
             "Constructor of backend wrapper must be implemented");
3526
}
3527

3528
BackendWrapper::BackendWrapper(const Ptr<BackendWrapper>& base, const MatShape& shape)
3529
{
3530
    CV_Error(Error::StsNotImplemented,
3531
             "Constructor of backend wrapper must be implemented");
3532
}
3533

3534
BackendWrapper::~BackendWrapper() {}
3535

3536
Net readNet(const String& _model, const String& _config, const String& _framework)
3537
{
3538
    String framework = toLowerCase(_framework);
3539
    String model = _model;
3540
    String config = _config;
3541
    const std::string modelExt = model.substr(model.rfind('.') + 1);
3542
    const std::string configExt = config.substr(config.rfind('.') + 1);
3543
    if (framework == "caffe" || modelExt == "caffemodel" || configExt == "caffemodel" ||
3544
                                modelExt == "prototxt" || configExt == "prototxt")
3545
    {
3546
        if (modelExt == "prototxt" || configExt == "caffemodel")
3547
            std::swap(model, config);
3548
        return readNetFromCaffe(config, model);
3549
    }
3550
    if (framework == "tensorflow" || modelExt == "pb" || configExt == "pb" ||
3551
                                     modelExt == "pbtxt" || configExt == "pbtxt")
3552
    {
3553
        if (modelExt == "pbtxt" || configExt == "pb")
3554
            std::swap(model, config);
3555
        return readNetFromTensorflow(model, config);
3556
    }
3557
    if (framework == "torch" || modelExt == "t7" || modelExt == "net" ||
3558
                                configExt == "t7" || configExt == "net")
3559
    {
3560
        return readNetFromTorch(model.empty() ? config : model);
3561
    }
3562
    if (framework == "darknet" || modelExt == "weights" || configExt == "weights" ||
3563
                                  modelExt == "cfg" || configExt == "cfg")
3564
    {
3565
        if (modelExt == "cfg" || configExt == "weights")
3566
            std::swap(model, config);
3567
        return readNetFromDarknet(config, model);
3568
    }
3569
    if (framework == "dldt" || modelExt == "bin" || configExt == "bin" ||
3570
                               modelExt == "xml" || configExt == "xml")
3571
    {
3572
        if (modelExt == "xml" || configExt == "bin")
3573
            std::swap(model, config);
3574
        return readNetFromModelOptimizer(config, model);
3575
    }
3576
    if (framework == "onnx" || modelExt == "onnx")
3577
    {
3578
        return readNetFromONNX(model);
3579
    }
3580
    CV_Error(Error::StsError, "Cannot determine an origin framework of files: " +
3581
                                      model + (config.empty() ? "" : ", " + config));
3582
}
3583

3584
Net readNet(const String& _framework, const std::vector<uchar>& bufferModel,
3585
            const std::vector<uchar>& bufferConfig)
3586
{
3587
    String framework = toLowerCase(_framework);
3588
    if (framework == "caffe")
3589
        return readNetFromCaffe(bufferConfig, bufferModel);
3590
    else if (framework == "tensorflow")
3591
        return readNetFromTensorflow(bufferModel, bufferConfig);
3592
    else if (framework == "darknet")
3593
        return readNetFromDarknet(bufferConfig, bufferModel);
3594
    else if (framework == "torch")
3595
        CV_Error(Error::StsNotImplemented, "Reading Torch models from buffers");
3596
    else if (framework == "dldt")
3597
        CV_Error(Error::StsNotImplemented, "Reading Intel's Model Optimizer models from buffers");
3598
    CV_Error(Error::StsError, "Cannot determine an origin framework with a name " + framework);
3599
}
3600

3601
Net readNetFromModelOptimizer(const String &xml, const String &bin)
3602
{
3603
    return Net::readFromModelOptimizer(xml, bin);
3604
}
3605

3606
CV__DNN_INLINE_NS_END
3607
}} // namespace
3608

3609
Product

Resources

Company