CoCalc -- darknet

GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/dnn/src/darknet/darknet_io.cpp
¹⁶³³⁹ views
1
/*M///////////////////////////////////////////////////////////////////////////////////////
2
//
3
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4
//
5
//  By downloading, copying, installing or using the software you agree to this license.
6
//  If you do not agree to this license, do not download, install,
7
//  copy or use the software.
8
//
9
//
10
//                           License Agreement
11
//                For Open Source Computer Vision Library
12
//                        (3-clause BSD License)
13
//
14
// Copyright (C) 2017, Intel Corporation, all rights reserved.
15
// Third party copyrights are property of their respective owners.
16
//
17
// Redistribution and use in source and binary forms, with or without modification,
18
// are permitted provided that the following conditions are met:
19
//
20
// * Redistributions of source code must retain the above copyright notice,
21
// this list of conditions and the following disclaimer.
22
//
23
// * Redistributions in binary form must reproduce the above copyright notice,
24
// this list of conditions and the following disclaimer in the documentation
25
// and/or other materials provided with the distribution.
26
//
27
// * Neither the names of the copyright holders nor the names of the contributors
28
// may be used to endorse or promote products derived from this software
29
// without specific prior written permission.
30
//
31
// This software is provided by the copyright holders and contributors "as is" and
32
// any express or implied warranties, including, but not limited to, the implied
33
// warranties of merchantability and fitness for a particular purpose are disclaimed.
34
// In no event shall copyright holders or contributors be liable for any direct,
35
// indirect, incidental, special, exemplary, or consequential damages
36
// (including, but not limited to, procurement of substitute goods or services;
37
// loss of use, data, or profits; or business interruption) however caused
38
// and on any theory of liability, whether in contract, strict liability,
39
// or tort (including negligence or otherwise) arising in any way out of
40
// the use of this software, even if advised of the possibility of such damage.
41
//
42
//M*/
43

44
/*M///////////////////////////////////////////////////////////////////////////////////////
45
//MIT License
46
//
47
//Copyright (c) 2017 Joseph Redmon
48
//
49
//Permission is hereby granted, free of charge, to any person obtaining a copy
50
//of this software and associated documentation files (the "Software"), to deal
51
//in the Software without restriction, including without limitation the rights
52
//to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
53
//copies of the Software, and to permit persons to whom the Software is
54
//furnished to do so, subject to the following conditions:
55
//
56
//The above copyright notice and this permission notice shall be included in all
57
//copies or substantial portions of the Software.
58
//
59
//THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
60
//IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
61
//FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
62
//AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
63
//LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
64
//OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
65
//SOFTWARE.
66
//
67
//M*/
68

69
#include "../precomp.hpp"
70

71
#include <iostream>
72
#include <fstream>
73
#include <sstream>
74

75
#include "darknet_io.hpp"
76

77
namespace cv {
78
    namespace dnn {
79
        namespace darknet {
80

81
            template<typename T>
82
            T getParam(const std::map<std::string, std::string> &params, const std::string param_name, T init_val)
83
            {
84
                std::map<std::string, std::string>::const_iterator it = params.find(param_name);
85
                if (it != params.end()) {
86
                    std::stringstream ss(it->second);
87
                    ss >> init_val;
88
                }
89
                return init_val;
90
            }
91

92
            static const std::string kFirstLayerName = "data";
93

94
            class setLayersParams {
95

96
                NetParameter *net;
97
                int layer_id;
98
                std::string last_layer;
99
                std::vector<std::string> fused_layer_names;
100

101
            public:
102
                setLayersParams(NetParameter *_net) :
103
                    net(_net), layer_id(0), last_layer(kFirstLayerName)
104
                {}
105

106
                void setLayerBlobs(int i, std::vector<cv::Mat> blobs)
107
                {
108
                    cv::dnn::LayerParams &params = net->layers[i].layerParams;
109
                    params.blobs = blobs;
110
                }
111

112
                cv::dnn::LayerParams getParamConvolution(int kernel, int pad,
113
                    int stride, int filters_num)
114
                {
115
                    cv::dnn::LayerParams params;
116
                    params.name = "Convolution-name";
117
                    params.type = "Convolution";
118

119
                    params.set<int>("kernel_size", kernel);
120
                    params.set<int>("pad", pad);
121
                    params.set<int>("stride", stride);
122

123
                    params.set<bool>("bias_term", false);	// true only if(BatchNorm == false)
124
                    params.set<int>("num_output", filters_num);
125

126
                    return params;
127
                }
128

129

130
                void setConvolution(int kernel, int pad, int stride,
131
                    int filters_num, int channels_num, int use_batch_normalize, int use_relu)
132
                {
133
                    cv::dnn::LayerParams conv_param =
134
                        getParamConvolution(kernel, pad, stride, filters_num);
135

136
                    darknet::LayerParameter lp;
137
                    std::string layer_name = cv::format("conv_%d", layer_id);
138

139
                    // use BIAS in any case
140
                    if (!use_batch_normalize) {
141
                        conv_param.set<bool>("bias_term", true);
142
                    }
143

144
                    lp.layer_name = layer_name;
145
                    lp.layer_type = conv_param.type;
146
                    lp.layerParams = conv_param;
147
                    lp.bottom_indexes.push_back(last_layer);
148
                    last_layer = layer_name;
149
                    net->layers.push_back(lp);
150

151
                    if (use_batch_normalize)
152
                    {
153
                        cv::dnn::LayerParams bn_param;
154

155
                        bn_param.name = "BatchNorm-name";
156
                        bn_param.type = "BatchNorm";
157
                        bn_param.set<bool>("has_weight", true);
158
                        bn_param.set<bool>("has_bias", true);
159
                        bn_param.set<float>("eps", 1E-6);	// .000001f in Darknet Yolo
160

161
                        darknet::LayerParameter lp;
162
                        std::string layer_name = cv::format("bn_%d", layer_id);
163
                        lp.layer_name = layer_name;
164
                        lp.layer_type = bn_param.type;
165
                        lp.layerParams = bn_param;
166
                        lp.bottom_indexes.push_back(last_layer);
167
                        last_layer = layer_name;
168
                        net->layers.push_back(lp);
169
                    }
170

171
                    if (use_relu)
172
                    {
173
                        cv::dnn::LayerParams activation_param;
174
                        activation_param.set<float>("negative_slope", 0.1f);
175
                        activation_param.name = "ReLU-name";
176
                        activation_param.type = "ReLU";
177

178
                        darknet::LayerParameter lp;
179
                        std::string layer_name = cv::format("relu_%d", layer_id);
180
                        lp.layer_name = layer_name;
181
                        lp.layer_type = activation_param.type;
182
                        lp.layerParams = activation_param;
183
                        lp.bottom_indexes.push_back(last_layer);
184
                        last_layer = layer_name;
185
                        net->layers.push_back(lp);
186
                    }
187

188
                    layer_id++;
189
                    fused_layer_names.push_back(last_layer);
190
                }
191

192
                void setMaxpool(size_t kernel, size_t pad, size_t stride)
193
                {
194
                    cv::dnn::LayerParams maxpool_param;
195
                    maxpool_param.set<cv::String>("pool", "max");
196
                    maxpool_param.set<int>("kernel_size", kernel);
197
                    maxpool_param.set<int>("pad", pad);
198
                    maxpool_param.set<int>("stride", stride);
199
                    maxpool_param.set<cv::String>("pad_mode", "SAME");
200
                    maxpool_param.name = "Pooling-name";
201
                    maxpool_param.type = "Pooling";
202
                    darknet::LayerParameter lp;
203

204
                    std::string layer_name = cv::format("pool_%d", layer_id);
205
                    lp.layer_name = layer_name;
206
                    lp.layer_type = maxpool_param.type;
207
                    lp.layerParams = maxpool_param;
208
                    lp.bottom_indexes.push_back(last_layer);
209
                    last_layer = layer_name;
210
                    net->layers.push_back(lp);
211
                    layer_id++;
212
                    fused_layer_names.push_back(last_layer);
213
                }
214

215
                void setAvgpool()
216
                {
217
                    cv::dnn::LayerParams avgpool_param;
218
                    avgpool_param.set<cv::String>("pool", "ave");
219
                    avgpool_param.set<bool>("global_pooling", true);
220
                    avgpool_param.name = "Pooling-name";
221
                    avgpool_param.type = "Pooling";
222
                    darknet::LayerParameter lp;
223

224
                    std::string layer_name = cv::format("avgpool_%d", layer_id);
225
                    lp.layer_name = layer_name;
226
                    lp.layer_type = avgpool_param.type;
227
                    lp.layerParams = avgpool_param;
228
                    lp.bottom_indexes.push_back(last_layer);
229
                    last_layer = layer_name;
230
                    net->layers.push_back(lp);
231
                    layer_id++;
232
                    fused_layer_names.push_back(last_layer);
233
                }
234

235
                void setSoftmax()
236
                {
237
                    cv::dnn::LayerParams softmax_param;
238
                    softmax_param.name = "Softmax-name";
239
                    softmax_param.type = "Softmax";
240
                    darknet::LayerParameter lp;
241

242
                    std::string layer_name = cv::format("softmax_%d", layer_id);
243
                    lp.layer_name = layer_name;
244
                    lp.layer_type = softmax_param.type;
245
                    lp.layerParams = softmax_param;
246
                    lp.bottom_indexes.push_back(last_layer);
247
                    last_layer = layer_name;
248
                    net->layers.push_back(lp);
249
                    layer_id++;
250
                    fused_layer_names.push_back(last_layer);
251
                }
252

253
                void setConcat(int number_of_inputs, int *input_indexes)
254
                {
255
                    cv::dnn::LayerParams concat_param;
256
                    concat_param.name = "Concat-name";
257
                    concat_param.type = "Concat";
258
                    concat_param.set<int>("axis", 1);	// channels are in axis = 1
259

260
                    darknet::LayerParameter lp;
261

262
                    std::string layer_name = cv::format("concat_%d", layer_id);
263
                    lp.layer_name = layer_name;
264
                    lp.layer_type = concat_param.type;
265
                    lp.layerParams = concat_param;
266
                    for (int i = 0; i < number_of_inputs; ++i)
267
                        lp.bottom_indexes.push_back(fused_layer_names.at(input_indexes[i]));
268

269
                    last_layer = layer_name;
270
                    net->layers.push_back(lp);
271

272
                    layer_id++;
273
                    fused_layer_names.push_back(last_layer);
274
                }
275

276
                void setIdentity(int bottom_index)
277
                {
278
                    cv::dnn::LayerParams identity_param;
279
                    identity_param.name = "Identity-name";
280
                    identity_param.type = "Identity";
281

282
                    darknet::LayerParameter lp;
283

284
                    std::string layer_name = cv::format("identity_%d", layer_id);
285
                    lp.layer_name = layer_name;
286
                    lp.layer_type = identity_param.type;
287
                    lp.layerParams = identity_param;
288
                    lp.bottom_indexes.push_back(fused_layer_names.at(bottom_index));
289

290
                    last_layer = layer_name;
291
                    net->layers.push_back(lp);
292

293
                    layer_id++;
294
                    fused_layer_names.push_back(last_layer);
295
                }
296

297
                void setReorg(int stride)
298
                {
299
                    cv::dnn::LayerParams reorg_params;
300
                    reorg_params.name = "Reorg-name";
301
                    reorg_params.type = "Reorg";
302
                    reorg_params.set<int>("reorg_stride", stride);
303

304
                    darknet::LayerParameter lp;
305
                    std::string layer_name = cv::format("reorg_%d", layer_id);
306
                    lp.layer_name = layer_name;
307
                    lp.layer_type = reorg_params.type;
308
                    lp.layerParams = reorg_params;
309
                    lp.bottom_indexes.push_back(last_layer);
310
                    last_layer = layer_name;
311

312
                    net->layers.push_back(lp);
313

314
                    layer_id++;
315
                    fused_layer_names.push_back(last_layer);
316
                }
317

318
                void setPermute(bool isDarknetLayer = true)
319
                {
320
                    cv::dnn::LayerParams permute_params;
321
                    permute_params.name = "Permute-name";
322
                    permute_params.type = "Permute";
323
                    int permute[] = { 0, 2, 3, 1 };
324
                    cv::dnn::DictValue paramOrder = cv::dnn::DictValue::arrayInt(permute, 4);
325

326
                    permute_params.set("order", paramOrder);
327

328
                    darknet::LayerParameter lp;
329
                    std::string layer_name = cv::format("permute_%d", layer_id);
330
                    lp.layer_name = layer_name;
331
                    lp.layer_type = permute_params.type;
332
                    lp.layerParams = permute_params;
333
                    lp.bottom_indexes.push_back(last_layer);
334
                    last_layer = layer_name;
335
                    net->layers.push_back(lp);
336

337
                    if (isDarknetLayer)
338
                    {
339
                        layer_id++;
340
                        fused_layer_names.push_back(last_layer);
341
                    }
342
                }
343

344
                void setRegion(float thresh, int coords, int classes, int anchors, int classfix, int softmax, int softmax_tree, float *biasData)
345
                {
346
                    cv::dnn::LayerParams region_param;
347
                    region_param.name = "Region-name";
348
                    region_param.type = "Region";
349

350
                    region_param.set<float>("thresh", thresh);
351
                    region_param.set<int>("coords", coords);
352
                    region_param.set<int>("classes", classes);
353
                    region_param.set<int>("anchors", anchors);
354
                    region_param.set<int>("classfix", classfix);
355
                    region_param.set<bool>("softmax_tree", softmax_tree);
356
                    region_param.set<bool>("softmax", softmax);
357

358
                    cv::Mat biasData_mat = cv::Mat(1, anchors * 2, CV_32F, biasData).clone();
359
                    region_param.blobs.push_back(biasData_mat);
360

361
                    darknet::LayerParameter lp;
362
                    std::string layer_name = "detection_out";
363
                    lp.layer_name = layer_name;
364
                    lp.layer_type = region_param.type;
365
                    lp.layerParams = region_param;
366
                    lp.bottom_indexes.push_back(last_layer);
367
                    last_layer = layer_name;
368
                    net->layers.push_back(lp);
369

370
                    layer_id++;
371
                    fused_layer_names.push_back(last_layer);
372
                }
373

374
                void setYolo(int classes, const std::vector<int>& mask, const std::vector<float>& anchors)
375
                {
376
                    cv::dnn::LayerParams region_param;
377
                    region_param.name = "Region-name";
378
                    region_param.type = "Region";
379

380
                    const int numAnchors = mask.size();
381

382
                    region_param.set<int>("classes", classes);
383
                    region_param.set<int>("anchors", numAnchors);
384
                    region_param.set<bool>("logistic", true);
385

386
                    std::vector<float> usedAnchors(numAnchors * 2);
387
                    for (int i = 0; i < numAnchors; ++i)
388
                    {
389
                        usedAnchors[i * 2] = anchors[mask[i] * 2];
390
                        usedAnchors[i * 2 + 1] = anchors[mask[i] * 2 + 1];
391
                    }
392

393
                    cv::Mat biasData_mat = cv::Mat(1, numAnchors * 2, CV_32F, &usedAnchors[0]).clone();
394
                    region_param.blobs.push_back(biasData_mat);
395

396
                    darknet::LayerParameter lp;
397
                    std::string layer_name = cv::format("yolo_%d", layer_id);
398
                    lp.layer_name = layer_name;
399
                    lp.layer_type = region_param.type;
400
                    lp.layerParams = region_param;
401
                    lp.bottom_indexes.push_back(last_layer);
402
                    lp.bottom_indexes.push_back(kFirstLayerName);
403
                    last_layer = layer_name;
404
                    net->layers.push_back(lp);
405

406
                    layer_id++;
407
                    fused_layer_names.push_back(last_layer);
408
                }
409

410
                void setShortcut(int from)
411
                {
412
                    cv::dnn::LayerParams shortcut_param;
413
                    shortcut_param.name = "Shortcut-name";
414
                    shortcut_param.type = "Eltwise";
415

416
                    shortcut_param.set<std::string>("op", "sum");
417

418
                    darknet::LayerParameter lp;
419
                    std::string layer_name = cv::format("shortcut_%d", layer_id);
420
                    lp.layer_name = layer_name;
421
                    lp.layer_type = shortcut_param.type;
422
                    lp.layerParams = shortcut_param;
423
                    lp.bottom_indexes.push_back(fused_layer_names.at(from));
424
                    lp.bottom_indexes.push_back(last_layer);
425
                    last_layer = layer_name;
426
                    net->layers.push_back(lp);
427

428
                    layer_id++;
429
                    fused_layer_names.push_back(last_layer);
430
                }
431

432
                void setUpsample(int scaleFactor)
433
                {
434
                    cv::dnn::LayerParams param;
435
                    param.name = "Upsample-name";
436
                    param.type = "Resize";
437

438
                    param.set<int>("zoom_factor", scaleFactor);
439
                    param.set<String>("interpolation", "nearest");
440

441
                    darknet::LayerParameter lp;
442
                    std::string layer_name = cv::format("upsample_%d", layer_id);
443
                    lp.layer_name = layer_name;
444
                    lp.layer_type = param.type;
445
                    lp.layerParams = param;
446
                    lp.bottom_indexes.push_back(last_layer);
447
                    last_layer = layer_name;
448
                    net->layers.push_back(lp);
449

450
                    layer_id++;
451
                    fused_layer_names.push_back(last_layer);
452
                }
453
            };
454

455
            std::string escapeString(const std::string &src)
456
            {
457
                std::string dst;
458
                for (size_t i = 0; i < src.size(); ++i)
459
                    if (src[i] > ' ' && src[i] <= 'z')
460
                        dst += src[i];
461
                return dst;
462
            }
463

464
            template<typename T>
465
            std::vector<T> getNumbers(const std::string &src)
466
            {
467
                std::vector<T> dst;
468
                std::stringstream ss(src);
469

470
                for (std::string str; std::getline(ss, str, ',');) {
471
                    std::stringstream line(str);
472
                    T val;
473
                    line >> val;
474
                    dst.push_back(val);
475
                }
476
                return dst;
477
            }
478

479
            bool ReadDarknetFromCfgStream(std::istream &ifile, NetParameter *net)
480
            {
481
                bool read_net = false;
482
                int layers_counter = -1;
483
                for (std::string line; std::getline(ifile, line);) {
484
                    line = escapeString(line);
485
                    if (line.empty()) continue;
486
                    switch (line[0]) {
487
                    case '\0': break;
488
                    case '#': break;
489
                    case ';': break;
490
                    case '[':
491
                        if (line == "[net]") {
492
                            read_net = true;
493
                        }
494
                        else {
495
                            // read section
496
                            read_net = false;
497
                            ++layers_counter;
498
                            const size_t layer_type_size = line.find("]") - 1;
499
                            CV_Assert(layer_type_size < line.size());
500
                            std::string layer_type = line.substr(1, layer_type_size);
501
                            net->layers_cfg[layers_counter]["type"] = layer_type;
502
                        }
503
                        break;
504
                    default:
505
                        // read entry
506
                        const size_t separator_index = line.find('=');
507
                        CV_Assert(separator_index < line.size());
508
                        if (separator_index != std::string::npos) {
509
                            std::string name = line.substr(0, separator_index);
510
                            std::string value = line.substr(separator_index + 1, line.size() - (separator_index + 1));
511
                            name = escapeString(name);
512
                            value = escapeString(value);
513
                            if (name.empty() || value.empty()) continue;
514
                            if (read_net)
515
                                net->net_cfg[name] = value;
516
                            else
517
                                net->layers_cfg[layers_counter][name] = value;
518
                        }
519
                    }
520
                }
521

522
                std::string anchors = net->layers_cfg[net->layers_cfg.size() - 1]["anchors"];
523
                std::vector<float> vec = getNumbers<float>(anchors);
524
                std::map<std::string, std::string> &net_params = net->net_cfg;
525
                net->width = getParam(net_params, "width", 416);
526
                net->height = getParam(net_params, "height", 416);
527
                net->channels = getParam(net_params, "channels", 3);
528
                CV_Assert(net->width > 0 && net->height > 0 && net->channels > 0);
529

530
                int current_channels = net->channels;
531
                net->out_channels_vec.resize(net->layers_cfg.size());
532

533
                layers_counter = -1;
534

535
                setLayersParams setParams(net);
536

537
                typedef std::map<int, std::map<std::string, std::string> >::iterator it_type;
538
                for (it_type i = net->layers_cfg.begin(); i != net->layers_cfg.end(); ++i) {
539
                    ++layers_counter;
540
                    std::map<std::string, std::string> &layer_params = i->second;
541
                    std::string layer_type = layer_params["type"];
542

543
                    if (layer_type == "convolutional")
544
                    {
545
                        int kernel_size = getParam<int>(layer_params, "size", -1);
546
                        int pad = getParam<int>(layer_params, "pad", 0);
547
                        int stride = getParam<int>(layer_params, "stride", 1);
548
                        int filters = getParam<int>(layer_params, "filters", -1);
549
                        std::string activation = getParam<std::string>(layer_params, "activation", "linear");
550
                        bool batch_normalize = getParam<int>(layer_params, "batch_normalize", 0) == 1;
551
                        if(activation != "linear" && activation != "leaky")
552
                            CV_Error(cv::Error::StsParseError, "Unsupported activation: " + activation);
553
                        int flipped = getParam<int>(layer_params, "flipped", 0);
554
                        if (flipped == 1)
555
                            CV_Error(cv::Error::StsNotImplemented, "Transpose the convolutional weights is not implemented");
556

557
                        // correct the strange value of pad=1 for kernel_size=1 in the Darknet cfg-file
558
                        if (kernel_size < 3) pad = 0;
559

560
                        CV_Assert(kernel_size > 0 && filters > 0);
561
                        CV_Assert(current_channels > 0);
562

563
                        setParams.setConvolution(kernel_size, pad, stride, filters, current_channels,
564
                            batch_normalize, activation == "leaky");
565

566
                        current_channels = filters;
567
                    }
568
                    else if (layer_type == "maxpool")
569
                    {
570
                        int kernel_size = getParam<int>(layer_params, "size", 2);
571
                        int stride = getParam<int>(layer_params, "stride", 2);
572
                        int pad = getParam<int>(layer_params, "pad", 0);
573
                        setParams.setMaxpool(kernel_size, pad, stride);
574
                    }
575
                    else if (layer_type == "avgpool")
576
                    {
577
                        setParams.setAvgpool();
578
                    }
579
                    else if (layer_type == "softmax")
580
                    {
581
                        int groups = getParam<int>(layer_params, "groups", 1);
582
                        if (groups != 1)
583
                            CV_Error(Error::StsNotImplemented, "Softmax from Darknet with groups != 1");
584
                        setParams.setSoftmax();
585
                    }
586
                    else if (layer_type == "route")
587
                    {
588
                        std::string bottom_layers = getParam<std::string>(layer_params, "layers", "");
589
                        CV_Assert(!bottom_layers.empty());
590
                        std::vector<int> layers_vec = getNumbers<int>(bottom_layers);
591

592
                        current_channels = 0;
593
                        for (size_t k = 0; k < layers_vec.size(); ++k) {
594
                            layers_vec[k] = layers_vec[k] > 0 ? layers_vec[k] : (layers_vec[k] + layers_counter);
595
                            current_channels += net->out_channels_vec[layers_vec[k]];
596
                        }
597

598
                        if (layers_vec.size() == 1)
599
                            setParams.setIdentity(layers_vec.at(0));
600
                        else
601
                            setParams.setConcat(layers_vec.size(), layers_vec.data());
602
                    }
603
                    else if (layer_type == "reorg")
604
                    {
605
                        int stride = getParam<int>(layer_params, "stride", 2);
606
                        current_channels = current_channels * (stride*stride);
607

608
                        setParams.setReorg(stride);
609
                    }
610
                    else if (layer_type == "region")
611
                    {
612
                        float thresh = getParam<float>(layer_params, "thresh", 0.001);
613
                        int coords = getParam<int>(layer_params, "coords", 4);
614
                        int classes = getParam<int>(layer_params, "classes", -1);
615
                        int num_of_anchors = getParam<int>(layer_params, "num", -1);
616
                        int classfix = getParam<int>(layer_params, "classfix", 0);
617
                        bool softmax = (getParam<int>(layer_params, "softmax", 0) == 1);
618
                        bool softmax_tree = (getParam<std::string>(layer_params, "tree", "").size() > 0);
619

620
                        std::string anchors_values = getParam<std::string>(layer_params, "anchors", std::string());
621
                        CV_Assert(!anchors_values.empty());
622
                        std::vector<float> anchors_vec = getNumbers<float>(anchors_values);
623

624
                        CV_Assert(classes > 0 && num_of_anchors > 0 && (num_of_anchors * 2) == anchors_vec.size());
625

626
                        setParams.setPermute(false);
627
                        setParams.setRegion(thresh, coords, classes, num_of_anchors, classfix, softmax, softmax_tree, anchors_vec.data());
628
                    }
629
                    else if (layer_type == "shortcut")
630
                    {
631
                        std::string bottom_layer = getParam<std::string>(layer_params, "from", "");
632
                        CV_Assert(!bottom_layer.empty());
633
                        int from = std::atoi(bottom_layer.c_str());
634

635
                        from += layers_counter;
636
                        current_channels = net->out_channels_vec[from];
637

638
                        setParams.setShortcut(from);
639
                    }
640
                    else if (layer_type == "upsample")
641
                    {
642
                        int scaleFactor = getParam<int>(layer_params, "stride", 1);
643
                        setParams.setUpsample(scaleFactor);
644
                    }
645
                    else if (layer_type == "yolo")
646
                    {
647
                        int classes = getParam<int>(layer_params, "classes", -1);
648
                        int num_of_anchors = getParam<int>(layer_params, "num", -1);
649

650
                        std::string anchors_values = getParam<std::string>(layer_params, "anchors", std::string());
651
                        CV_Assert(!anchors_values.empty());
652
                        std::vector<float> anchors_vec = getNumbers<float>(anchors_values);
653

654
                        std::string mask_values = getParam<std::string>(layer_params, "mask", std::string());
655
                        CV_Assert(!mask_values.empty());
656
                        std::vector<int> mask_vec = getNumbers<int>(mask_values);
657

658
                        CV_Assert(classes > 0 && num_of_anchors > 0 && (num_of_anchors * 2) == anchors_vec.size());
659

660
                        setParams.setPermute(false);
661
                        setParams.setYolo(classes, mask_vec, anchors_vec);
662
                    }
663
                    else {
664
                        CV_Error(cv::Error::StsParseError, "Unknown layer type: " + layer_type);
665
                    }
666
                    net->out_channels_vec[layers_counter] = current_channels;
667
                }
668

669
                return true;
670
            }
671

672
            bool ReadDarknetFromWeightsStream(std::istream &ifile, NetParameter *net)
673
            {
674
                int32_t major_ver, minor_ver, revision;
675
                ifile.read(reinterpret_cast<char *>(&major_ver), sizeof(int32_t));
676
                ifile.read(reinterpret_cast<char *>(&minor_ver), sizeof(int32_t));
677
                ifile.read(reinterpret_cast<char *>(&revision), sizeof(int32_t));
678

679
                uint64_t seen;
680
                if ((major_ver * 10 + minor_ver) >= 2) {
681
                    ifile.read(reinterpret_cast<char *>(&seen), sizeof(uint64_t));
682
                }
683
                else {
684
                    int32_t iseen = 0;
685
                    ifile.read(reinterpret_cast<char *>(&iseen), sizeof(int32_t));
686
                    seen = iseen;
687
                }
688
                bool transpose = (major_ver > 1000) || (minor_ver > 1000);
689
                if(transpose)
690
                    CV_Error(cv::Error::StsNotImplemented, "Transpose the weights (except for convolutional) is not implemented");
691

692
                int current_channels = net->channels;
693
                int cv_layers_counter = -1;
694
                int darknet_layers_counter = -1;
695

696
                setLayersParams setParams(net);
697

698
                typedef std::map<int, std::map<std::string, std::string> >::iterator it_type;
699
                for (it_type i = net->layers_cfg.begin(); i != net->layers_cfg.end(); ++i) {
700
                    ++darknet_layers_counter;
701
                    ++cv_layers_counter;
702
                    std::map<std::string, std::string> &layer_params = i->second;
703
                    std::string layer_type = layer_params["type"];
704

705
                    if (layer_type == "convolutional")
706
                    {
707
                        int kernel_size = getParam<int>(layer_params, "size", -1);
708
                        int filters = getParam<int>(layer_params, "filters", -1);
709
                        std::string activation = getParam<std::string>(layer_params, "activation", "linear");
710
                        bool use_batch_normalize = getParam<int>(layer_params, "batch_normalize", 0) == 1;
711

712
                        CV_Assert(kernel_size > 0 && filters > 0);
713
                        CV_Assert(current_channels > 0);
714

715
                        size_t const weights_size = filters * current_channels * kernel_size * kernel_size;
716
                        int sizes_weights[] = { filters, current_channels, kernel_size, kernel_size };
717
                        cv::Mat weightsBlob;
718
                        weightsBlob.create(4, sizes_weights, CV_32F);
719
                        CV_Assert(weightsBlob.isContinuous());
720

721
                        cv::Mat meanData_mat(1, filters, CV_32F);	// mean
722
                        cv::Mat stdData_mat(1, filters, CV_32F);	// variance
723
                        cv::Mat weightsData_mat(1, filters, CV_32F);// scale
724
                        cv::Mat biasData_mat(1, filters, CV_32F);	// bias
725

726
                        ifile.read(reinterpret_cast<char *>(biasData_mat.ptr<float>()), sizeof(float)*filters);
727
                        if (use_batch_normalize) {
728
                            ifile.read(reinterpret_cast<char *>(weightsData_mat.ptr<float>()), sizeof(float)*filters);
729
                            ifile.read(reinterpret_cast<char *>(meanData_mat.ptr<float>()), sizeof(float)*filters);
730
                            ifile.read(reinterpret_cast<char *>(stdData_mat.ptr<float>()), sizeof(float)*filters);
731
                        }
732
                        ifile.read(reinterpret_cast<char *>(weightsBlob.ptr<float>()), sizeof(float)*weights_size);
733

734
                        // set convolutional weights
735
                        std::vector<cv::Mat> conv_blobs;
736
                        conv_blobs.push_back(weightsBlob);
737
                        if (!use_batch_normalize) {
738
                            // use BIAS in any case
739
                            conv_blobs.push_back(biasData_mat);
740
                        }
741
                        setParams.setLayerBlobs(cv_layers_counter, conv_blobs);
742

743
                        // set batch normalize (mean, variance, scale, bias)
744
                        if (use_batch_normalize) {
745
                            ++cv_layers_counter;
746
                            std::vector<cv::Mat> bn_blobs;
747
                            bn_blobs.push_back(meanData_mat);
748
                            bn_blobs.push_back(stdData_mat);
749
                            bn_blobs.push_back(weightsData_mat);
750
                            bn_blobs.push_back(biasData_mat);
751
                            setParams.setLayerBlobs(cv_layers_counter, bn_blobs);
752
                        }
753

754
                        if(activation == "leaky")
755
                            ++cv_layers_counter;
756
                    }
757
                    if (layer_type == "region" || layer_type == "yolo")
758
                    {
759
                        ++cv_layers_counter;  // For permute.
760
                    }
761
                    current_channels = net->out_channels_vec[darknet_layers_counter];
762
                }
763
                return true;
764
            }
765

766
        }
767

768

769
        void ReadNetParamsFromCfgStreamOrDie(std::istream &ifile, darknet::NetParameter *net)
770
        {
771
            if (!darknet::ReadDarknetFromCfgStream(ifile, net)) {
772
                CV_Error(cv::Error::StsParseError, "Failed to parse NetParameter stream");
773
            }
774
        }
775

776
        void ReadNetParamsFromBinaryStreamOrDie(std::istream &ifile, darknet::NetParameter *net)
777
        {
778
            if (!darknet::ReadDarknetFromWeightsStream(ifile, net)) {
779
                CV_Error(cv::Error::StsParseError, "Failed to parse NetParameter stream");
780
            }
781
        }
782
    }
783
}
784

785
Product

Resources

Company