Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/dnn/src/tensorflow/tf_importer.cpp
16345 views
1
// This file is part of OpenCV project.
2
// It is subject to the license terms in the LICENSE file found in the top-level directory
3
// of this distribution and at http://opencv.org/license.html.
4
5
// Copyright (C) 2016, Intel Corporation, all rights reserved.
6
// Third party copyrights are property of their respective owners.
7
8
/*
9
Implementation of Tensorflow models parser
10
*/
11
12
#include "../precomp.hpp"
13
14
#ifdef HAVE_PROTOBUF
15
#include "tf_io.hpp"
16
17
#include <iostream>
18
#include <fstream>
19
#include <algorithm>
20
#include <string>
21
#include <queue>
22
#include "tf_graph_simplifier.hpp"
23
#endif
24
25
namespace cv {
26
namespace dnn {
27
CV__DNN_INLINE_NS_BEGIN
28
29
#if HAVE_PROTOBUF
30
31
using ::google::protobuf::RepeatedField;
32
using ::google::protobuf::RepeatedPtrField;
33
using ::google::protobuf::Message;
34
using ::google::protobuf::Descriptor;
35
using ::google::protobuf::FieldDescriptor;
36
using ::google::protobuf::Reflection;
37
38
namespace
39
{
40
41
static int toNCHW(int idx)
42
{
43
CV_Assert(-4 <= idx && idx < 4);
44
if (idx == 0) return 0;
45
else if (idx > 0) return idx % 3 + 1;
46
else return (4 + idx) % 3 + 1;
47
}
48
49
// This values are used to indicate layer output's data layout where it's possible.
50
enum DataLayout
51
{
52
DATA_LAYOUT_NHWC,
53
DATA_LAYOUT_NCHW,
54
DATA_LAYOUT_UNKNOWN,
55
DATA_LAYOUT_PLANAR // 2-dimensional outputs (matmul, flatten, reshape to 2d)
56
};
57
58
typedef std::vector<std::pair<String, int> > StrIntVector;
59
60
struct Pin
61
{
62
Pin(const std::string &_name, int _blobIndex = 0) :
63
name(_name), blobIndex(_blobIndex) {}
64
65
Pin() :
66
name(""), blobIndex(-1) {}
67
68
std::string name;
69
int blobIndex;
70
};
71
72
void blobShapeFromTensor(const tensorflow::TensorProto &tensor, MatShape& shape)
73
{
74
shape.clear();
75
if (tensor.has_tensor_shape())
76
{
77
const tensorflow::TensorShapeProto &_shape = tensor.tensor_shape();
78
int i, n = _shape.dim_size();
79
if (n)
80
{
81
shape.resize(n);
82
83
for (i = 0; i < n; i++)
84
shape[i] = (int)_shape.dim(i).size();
85
}
86
else
87
shape.resize(1, 1); // Scalar.
88
}
89
else
90
{
91
CV_Error(Error::StsError, "Unknown shape of input tensor");
92
}
93
}
94
95
template <typename T>
96
void parseTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob)
97
{
98
MatShape shape;
99
blobShapeFromTensor(tensor, shape);
100
int dims = (int)shape.size();
101
102
if (dims == 4)
103
{
104
// REORDER blob NHWC to NCHW
105
swap(shape[2], shape[3]); // NHCW
106
swap(shape[1], shape[2]); // NCHW
107
}
108
109
dstBlob.create(shape, CV_32F);
110
111
Mat tensorContent = getTensorContent(tensor);
112
int size = tensorContent.total();
113
CV_Assert(size == (int)dstBlob.total());
114
115
float *dstData = dstBlob.ptr<float>();
116
const T *data = reinterpret_cast<const T*>(tensorContent.data);
117
118
if (dims == 4)
119
{
120
int num = shape[0], channels = shape[1], height = shape[2], width = shape[3];
121
int total = num*channels*height*width;
122
for(int i_n = 0; i_n < shape[0]; i_n++) {
123
for(int i_c = 0; i_c < shape[1]; i_c++) {
124
for(int i_h = 0; i_h < shape[2]; i_h++) {
125
for(int i_w = 0; i_w < shape[3]; i_w++) {
126
int dst_i = channels*height*width*i_n + height*width*i_c + width*i_h + i_w;
127
int src_i = channels*height*width*i_n + i_c + channels*width*i_h + channels*i_w;
128
129
CV_Assert(dst_i < total);
130
CV_Assert(src_i < total);
131
132
dstData[dst_i] = data[src_i];
133
}
134
}
135
}
136
}
137
} else {
138
for (int i = 0; i < size; i++)
139
dstData[i] = data[i];
140
}
141
}
142
143
void blobFromTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob)
144
{
145
switch (tensor.dtype()) {
146
case tensorflow::DT_FLOAT:
147
case tensorflow::DT_HALF:
148
parseTensor<float>(tensor, dstBlob);
149
break;
150
case tensorflow::DT_DOUBLE:
151
parseTensor<double>(tensor, dstBlob);
152
break;
153
default:
154
CV_Error(Error::StsError, "Tensor's data type is not supported");
155
break;
156
}
157
}
158
159
void printList(const tensorflow::AttrValue::ListValue &val)
160
{
161
std::cout << "(";
162
for (int i = 0; i < val.i_size(); i++)
163
std::cout << " " << val.i(i);
164
std::cout << " )";
165
}
166
167
void printTensorShape(const tensorflow::TensorShapeProto &shape)
168
{
169
std::cout << "[ ";
170
for (int d = 0; d < shape.dim_size(); d++)
171
std::cout << shape.dim(d).name() <<
172
":" << shape.dim(d).size() << " ";
173
std::cout << "]";
174
}
175
176
void printTensor(const tensorflow::TensorProto &tensor)
177
{
178
printTensorShape(tensor.tensor_shape());
179
180
if (tensor.tensor_content().empty())
181
return;
182
183
switch (tensor.dtype())
184
{
185
case tensorflow::DT_FLOAT:
186
{
187
const float *data = reinterpret_cast<const float*>(tensor.tensor_content().c_str());
188
int size = tensor.tensor_content().size() / sizeof(float);
189
for (int i = 0; i < std::min(10, size); i++)
190
std::cout << " " << data[i];
191
if (size > 10)
192
std::cout << " ... " << size - 10 << " more";
193
break;
194
}
195
case tensorflow::DT_INT32:
196
{
197
const int *data = reinterpret_cast<const int*>(tensor.tensor_content().c_str());
198
int size = tensor.tensor_content().size() / sizeof(int);
199
for (int i = 0; i < std::min(10, size); i++)
200
std::cout << " " << data[i];
201
if (size > 10)
202
std::cout << " ... " << size - 10 << " more";
203
break;
204
}
205
default:
206
CV_Error(Error::StsError, "Tensor type is not supported");
207
break;
208
}
209
}
210
211
void printLayerAttr(const tensorflow::NodeDef &layer)
212
{
213
std::cout << std::endl << layer.name() << ":" << layer.op();
214
for (int ii = 0; ii < layer.input_size(); ii++)
215
std::cout << "(" << layer.input(ii) << ")";
216
std::cout << std::endl;
217
google::protobuf::Map<std::string, tensorflow::AttrValue> attr
218
= layer.attr();
219
for (google::protobuf::Map<std::string, tensorflow::AttrValue>::const_iterator ai = attr.begin();
220
ai != attr.end(); ++ai)
221
{
222
std::cout << ai->first << ":";
223
if (ai->first == "dtype" || ai->first == "T")
224
std::cout << ai->second.i();
225
else if (ai->first == "padding")
226
std::cout << ai->second.s();
227
else if (ai->first == "transpose_a" || ai->first == "transpose_b")
228
std::cout << ai->second.b();
229
// else if (ai->first == "shape")
230
// printTensorShape(ai->second.shape());
231
else if (ai->first == "strides" || ai->first == "ksize")
232
printList(ai->second.list());
233
else
234
printTensor(ai->second.tensor());
235
std::cout << std::endl;
236
}
237
}
238
239
bool hasLayerAttr(const tensorflow::NodeDef &layer, const std::string &name)
240
{
241
google::protobuf::Map<std::string, tensorflow::AttrValue> attr = layer.attr();
242
return attr.find(name) != attr.end();
243
}
244
245
const tensorflow::AttrValue& getLayerAttr(const tensorflow::NodeDef &layer, const std::string &name)
246
{
247
return layer.attr().at(name);
248
}
249
250
static int getDataLayout(const tensorflow::NodeDef& layer)
251
{
252
if (hasLayerAttr(layer, "data_format"))
253
{
254
std::string format = getLayerAttr(layer, "data_format").s();
255
if (format == "NHWC" || format == "channels_last")
256
return DATA_LAYOUT_NHWC;
257
else if (format == "NCHW" || format == "channels_first")
258
return DATA_LAYOUT_NCHW;
259
else
260
CV_Error(Error::StsParseError, "Unknown data_format value: " + format);
261
}
262
return DATA_LAYOUT_UNKNOWN;
263
}
264
265
static inline std::string getNodeName(const std::string& tensorName)
266
{
267
return tensorName.substr(0, tensorName.rfind(':'));
268
}
269
270
static inline int getDataLayout(const std::string& layerName,
271
const std::map<String, int>& data_layouts)
272
{
273
std::map<String, int>::const_iterator it = data_layouts.find(getNodeName(layerName));
274
return it != data_layouts.end() ? it->second : DATA_LAYOUT_UNKNOWN;
275
}
276
277
void setStrides(LayerParams &layerParams, const tensorflow::NodeDef &layer)
278
{
279
if (hasLayerAttr(layer, "strides"))
280
{
281
const tensorflow::AttrValue& val = getLayerAttr(layer, "strides");
282
int dimX, dimY, dimC;
283
int layout = getDataLayout(layer);
284
if (layout == DATA_LAYOUT_NCHW)
285
{
286
dimC = 1; dimY = 2; dimX = 3;
287
}
288
else
289
{
290
dimY = 1; dimX = 2; dimC = 3;
291
}
292
if (val.list().i_size() != 4 ||
293
val.list().i(0) != 1 || val.list().i(dimC) != 1)
294
CV_Error(Error::StsError, "Unsupported strides");
295
layerParams.set("stride_h", static_cast<int>(val.list().i(dimY)));
296
layerParams.set("stride_w", static_cast<int>(val.list().i(dimX)));
297
}
298
}
299
300
DictValue parseDims(const tensorflow::TensorProto &tensor) {
301
MatShape shape;
302
blobShapeFromTensor(tensor, shape);
303
int dims = (int)shape.size();
304
305
CV_Assert(tensor.dtype() == tensorflow::DT_INT32);
306
CV_Assert(dims == 1);
307
308
Mat values = getTensorContent(tensor);
309
CV_Assert(values.type() == CV_32SC1);
310
// TODO: add reordering shape if dims == 4
311
return DictValue::arrayInt((int*)values.data, values.total());
312
}
313
314
void setKSize(LayerParams &layerParams, const tensorflow::NodeDef &layer)
315
{
316
if (hasLayerAttr(layer, "ksize"))
317
{
318
const tensorflow::AttrValue& val = getLayerAttr(layer, "ksize");
319
int dimX, dimY, dimC;
320
int layout = getDataLayout(layer);
321
if (layout == DATA_LAYOUT_NCHW)
322
{
323
dimC = 1; dimY = 2; dimX = 3;
324
}
325
else
326
{
327
dimY = 1; dimX = 2; dimC = 3;
328
}
329
if (val.list().i_size() != 4 ||
330
val.list().i(0) != 1 || val.list().i(dimC) != 1)
331
CV_Error(Error::StsError, "Unsupported ksize");
332
layerParams.set("kernel_h", static_cast<int>(val.list().i(dimY)));
333
layerParams.set("kernel_w", static_cast<int>(val.list().i(dimX)));
334
}
335
else
336
{
337
layerParams.set("kernel_h", 1);
338
layerParams.set("kernel_w", 1);
339
}
340
}
341
342
void setPadding(LayerParams &layerParams, const tensorflow::NodeDef &layer)
343
{
344
if (hasLayerAttr(layer, "padding"))
345
layerParams.set("pad_mode", getLayerAttr(layer, "padding").s());
346
}
347
348
Pin parsePin(const std::string &name)
349
{
350
Pin pin(name);
351
352
size_t delimiter_pos = name.find_first_of(":");
353
if (delimiter_pos != std::string::npos)
354
{
355
pin.name = name.substr(0, delimiter_pos);
356
std::istringstream(name.substr(delimiter_pos + 1)) >> pin.blobIndex;
357
}
358
359
return pin;
360
}
361
362
StrIntVector getNextLayers(const tensorflow::GraphDef& net, const String& layer_name, const String& type = "")
363
{
364
StrIntVector layers;
365
366
for (int li = 0; li < net.node_size(); li++)
367
{
368
const tensorflow::NodeDef& layer = net.node(li);
369
for (int input_id = 0; input_id < layer.input_size(); input_id++) {
370
String input_op_name = parsePin(layer.input(input_id)).name;
371
bool type_ok = type.empty() ? true : type == layer.op();
372
if (input_op_name == layer_name && type_ok)
373
layers.push_back(std::make_pair(layer.name(), li));
374
}
375
}
376
377
return layers;
378
}
379
380
void ExcludeLayer(tensorflow::GraphDef& net, const int layer_index, const int input_blob_index, bool remove_from_net = true) {
381
String layer_name = net.node(layer_index).name();
382
StrIntVector layers = getNextLayers(net, layer_name);
383
384
String removed_layer_input = net.node(layer_index).input(input_blob_index);
385
386
for (size_t i = 0; i < layers.size(); i++)
387
{
388
tensorflow::NodeDef* layer = net.mutable_node(layers[i].second);
389
for (int input_id = 0; input_id < layer->input_size(); input_id++) {
390
String input_op_name = layer->input(input_id);
391
392
if (input_op_name == layer_name) {
393
layer->set_input(input_id, removed_layer_input);
394
}
395
}
396
}
397
398
if (remove_from_net)
399
net.mutable_node()->DeleteSubrange(layer_index, 1);
400
}
401
402
class TFImporter {
403
public:
404
TFImporter(const char *model, const char *config = NULL);
405
TFImporter(const char *dataModel, size_t lenModel,
406
const char *dataConfig = NULL, size_t lenConfig = 0);
407
408
void populateNet(Net dstNet);
409
410
private:
411
void kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob);
412
413
void connect(const std::map<String, int>& layers_name_id_map, Net& network, const Pin& outPin,
414
const int input_layer_id, const int input_blob_id);
415
void connectToAllBlobs(const std::map<String, int>& layer_id, Net& network, const Pin& outPin,
416
const int input_layer_id, const int input_blobs_count);
417
const tensorflow::TensorProto& getConstBlob(const tensorflow::NodeDef &layer, std::map<String, int> const_layers,
418
int input_blob_index = -1, int* actual_inp_blob_idx = 0);
419
420
421
// Binary serialized TensorFlow graph includes weights.
422
tensorflow::GraphDef netBin;
423
// Optional text definition of TensorFlow graph. More flexible than binary format
424
// and may be used to build the network using binary format only as a weights storage.
425
// This approach is similar to Caffe's `.prorotxt` and `.caffemodel`.
426
tensorflow::GraphDef netTxt;
427
428
std::vector<String> netInputsNames;
429
};
430
431
TFImporter::TFImporter(const char *model, const char *config)
432
{
433
if (model && model[0])
434
ReadTFNetParamsFromBinaryFileOrDie(model, &netBin);
435
if (config && config[0])
436
ReadTFNetParamsFromTextFileOrDie(config, &netTxt);
437
}
438
439
TFImporter::TFImporter(const char *dataModel, size_t lenModel,
440
const char *dataConfig, size_t lenConfig)
441
{
442
if (dataModel != NULL && lenModel > 0)
443
ReadTFNetParamsFromBinaryBufferOrDie(dataModel, lenModel, &netBin);
444
if (dataConfig != NULL && lenConfig > 0)
445
ReadTFNetParamsFromTextBufferOrDie(dataConfig, lenConfig, &netTxt);
446
}
447
448
void TFImporter::kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob)
449
{
450
MatShape shape;
451
blobShapeFromTensor(tensor, shape);
452
int dims = (int)shape.size();
453
454
// TODO: other blob types
455
CV_Assert(tensor.dtype() == tensorflow::DT_FLOAT ||
456
tensor.dtype() == tensorflow::DT_HALF);
457
CV_Assert(dims == 4);
458
459
// REORDER kernel HWIO to OIHW
460
swap(shape[0], shape[2]); // IWHO
461
swap(shape[1], shape[3]); // IOHW
462
swap(shape[0], shape[1]); // OIHW
463
464
dstBlob.create(shape, CV_32F);
465
466
Mat tensorContent = getTensorContent(tensor);
467
int size = tensorContent.total();
468
CV_Assert(size == (int)dstBlob.total());
469
470
float *dstData = dstBlob.ptr<float>();
471
const float *data = reinterpret_cast<const float*>(tensorContent.data);
472
473
int out_c = shape[0], input_c = shape[1], height = shape[2], width = shape[3];
474
int total = out_c*input_c*height*width;
475
for(int i_oc = 0; i_oc < out_c; i_oc++) {
476
for(int i_ic = 0; i_ic < input_c; i_ic++) {
477
for(int i_h = 0; i_h < height; i_h++) {
478
for(int i_w = 0; i_w < width; i_w++) {
479
int dst_i = input_c*height*width*i_oc + height*width*i_ic + width*i_h + i_w;
480
int src_i = out_c*input_c*width*i_h + out_c*input_c*i_w + out_c*i_ic + i_oc;
481
CV_Assert(dst_i < total);
482
CV_Assert(src_i < total);
483
dstData[dst_i] = data[src_i];
484
}
485
}
486
}
487
}
488
}
489
490
void TFImporter::connect(const std::map<String, int>& layers_name_id_map, Net& network, const Pin& outPin,
491
const int input_layer_id, const int input_blob_id)
492
{
493
std::map<String, int>::const_iterator it = layers_name_id_map.find(outPin.name);
494
if (it == layers_name_id_map.end())
495
CV_Error(Error::StsError, "Input layer not found: " + outPin.name);
496
497
std::vector<String>::iterator inpNameIt = std::find(netInputsNames.begin(), netInputsNames.end(), outPin.name);
498
int blobIndex;
499
if (inpNameIt == netInputsNames.end())
500
blobIndex = outPin.blobIndex;
501
else
502
blobIndex = inpNameIt - netInputsNames.begin();
503
network.connect(it->second, blobIndex, input_layer_id, input_blob_id);
504
}
505
506
void TFImporter::connectToAllBlobs(const std::map<String, int>& layer_id, Net& network, const Pin& outPin,
507
const int input_layer_id, const int input_blobs_count)
508
{
509
for (int input_blob_id = 0; input_blob_id < input_blobs_count; input_blob_id++)
510
connect(layer_id, network, outPin, input_layer_id, input_blob_id);
511
}
512
513
const tensorflow::TensorProto& TFImporter::getConstBlob(const tensorflow::NodeDef &layer, std::map<String, int> const_layers,
514
int input_blob_index, int* actual_inp_blob_idx) {
515
if (input_blob_index == -1) {
516
for(int i = 0; i < layer.input_size(); i++) {
517
Pin input = parsePin(layer.input(i));
518
if (const_layers.find(input.name) != const_layers.end()) {
519
if (input_blob_index != -1)
520
CV_Error(Error::StsError, "More than one input is Const op");
521
522
input_blob_index = i;
523
}
524
}
525
}
526
527
if (input_blob_index == -1)
528
CV_Error(Error::StsError, "Const input blob for weights not found");
529
530
Pin kernel_inp = parsePin(layer.input(input_blob_index));
531
if (const_layers.find(kernel_inp.name) == const_layers.end())
532
CV_Error(Error::StsError, "Input [" + layer.input(input_blob_index) +
533
"] for node [" + layer.name() + "] not found");
534
if (kernel_inp.blobIndex != 0)
535
CV_Error(Error::StsError, "Unsupported kernel input");
536
537
if(actual_inp_blob_idx) {
538
*actual_inp_blob_idx = input_blob_index;
539
}
540
541
int nodeIdx = const_layers.at(kernel_inp.name);
542
if (nodeIdx < netBin.node_size() && netBin.node(nodeIdx).name() == kernel_inp.name)
543
{
544
return netBin.node(nodeIdx).attr().at("value").tensor();
545
}
546
else
547
{
548
CV_Assert_N(nodeIdx < netTxt.node_size(),
549
netTxt.node(nodeIdx).name() == kernel_inp.name);
550
return netTxt.node(nodeIdx).attr().at("value").tensor();
551
}
552
}
553
554
static void addConstNodes(tensorflow::GraphDef& net, std::map<String, int>& const_layers,
555
std::set<String>& layers_to_ignore)
556
{
557
for (int li = 0; li < net.node_size(); li++)
558
{
559
const tensorflow::NodeDef &layer = net.node(li);
560
String name = layer.name();
561
String type = layer.op();
562
563
if (type == "Dequantize")
564
{
565
// Example of Dequantize node:
566
// name: "conv2d_1/bias"
567
// op: "Dequantize"
568
// input: "conv2d_1/bias_quantized_const" (tensor of dtype DT_QUINT8)
569
// input: "conv2d_1/bias_quantized_min"
570
// input: "conv2d_1/bias_quantized_max"
571
// attr { key: "T" value { type: DT_QUINT8 } } (quantized type)
572
// attr { key: "mode" value { s: "MIN_FIRST" } } (quantization technique)
573
CV_Assert(layer.input_size() == 3);
574
for (int i = 0; i < 3; ++i)
575
CV_Assert(const_layers.find(layer.input(i)) != const_layers.end());
576
CV_Assert(hasLayerAttr(layer, "mode") &&
577
getLayerAttr(layer, "mode").s() == "MIN_FIRST");
578
579
int tensorId = const_layers[layer.input(0)];
580
int minId = const_layers[layer.input(1)];
581
int maxId = const_layers[layer.input(2)];
582
583
tensorflow::TensorProto* tensor = net.mutable_node(tensorId)
584
->mutable_attr()->at("value")
585
.mutable_tensor();
586
CV_Assert(tensor->dtype() == tensorflow::DT_QUINT8);
587
588
Mat qMin = getTensorContent(net.node(minId).attr().at("value").tensor());
589
Mat qMax = getTensorContent(net.node(maxId).attr().at("value").tensor());
590
CV_Assert_N(qMin.total() == 1, qMin.type() == CV_32FC1,
591
qMax.total() == 1, qMax.type() == CV_32FC1);
592
593
Mat content = getTensorContent(*tensor);
594
595
float minVal = qMin.at<float>(0);
596
float rangeScale = (qMax.at<float>(0) - minVal) / 255;
597
CV_Assert(rangeScale >= 0);
598
content.convertTo(content, CV_32FC1, rangeScale,
599
rangeScale * cvRound(minVal / rangeScale));
600
601
tensor->set_dtype(tensorflow::DT_FLOAT);
602
tensor->set_tensor_content(content.data, content.total() * content.elemSize1());
603
604
net.mutable_node(tensorId)->set_name(name);
605
CV_Assert(const_layers.insert(std::make_pair(name, tensorId)).second);
606
layers_to_ignore.insert(name);
607
continue;
608
}
609
else if (type != "Const")
610
continue; // only Const parameters are supported
611
612
if (layer.attr().find("value") != layer.attr().end())
613
{
614
CV_Assert(const_layers.insert(std::make_pair(name, li)).second);
615
}
616
layers_to_ignore.insert(name);
617
}
618
}
619
620
// If all inputs of specific layer have the same data layout we can say that
621
// this layer's output has this data layout too. Returns DATA_LAYOUT_UNKNOWN otherwise.
622
static int predictOutputDataLayout(const tensorflow::GraphDef& net,
623
const tensorflow::NodeDef& layer,
624
const std::map<String, int>& data_layouts)
625
{
626
int layout = getDataLayout(layer);
627
if (layout != DATA_LAYOUT_UNKNOWN)
628
return layout;
629
630
// Determine layout by layer's inputs
631
std::map<String, int>::const_iterator it;
632
for (int i = 0, n = layer.input_size(); i < n; ++i)
633
{
634
it = data_layouts.find(getNodeName(layer.input(i)));
635
if (it != data_layouts.end())
636
{
637
if (layout != DATA_LAYOUT_UNKNOWN)
638
{
639
if (it->second != layout && it->second != DATA_LAYOUT_UNKNOWN)
640
return DATA_LAYOUT_UNKNOWN;
641
}
642
else
643
layout = it->second;
644
}
645
}
646
647
if (layout != DATA_LAYOUT_UNKNOWN)
648
return layout;
649
650
// Determine layout by layer's consumers recursively.
651
it = data_layouts.find(layer.name());
652
CV_Assert(it != data_layouts.end());
653
return it->second;
654
}
655
656
void TFImporter::populateNet(Net dstNet)
657
{
658
RemoveIdentityOps(netBin);
659
RemoveIdentityOps(netTxt);
660
661
if (!netTxt.ByteSize())
662
simplifySubgraphs(netBin);
663
664
std::set<String> layers_to_ignore;
665
666
tensorflow::GraphDef& net = netTxt.ByteSize() != 0 ? netTxt : netBin;
667
668
int layersSize = net.node_size();
669
670
std::map<String, int> data_layouts;
671
// Pre-fill data layouts where they are set explicitly.
672
// Assuming that nodes are in topological order
673
for (int i = net.node_size() - 1; i >= 0; --i)
674
{
675
const tensorflow::NodeDef& layer = net.node(i);
676
std::string name = layer.name();
677
678
int layout = getDataLayout(layer);
679
std::map<String, int>::iterator it = data_layouts.find(name);
680
if (it != data_layouts.end())
681
{
682
if (layout != DATA_LAYOUT_UNKNOWN)
683
{
684
if (it->second == DATA_LAYOUT_UNKNOWN)
685
it->second = layout;
686
else if (it->second != layout)
687
{
688
it->second = DATA_LAYOUT_UNKNOWN;
689
layout = DATA_LAYOUT_UNKNOWN;
690
}
691
}
692
else
693
layout = it->second;
694
}
695
else
696
data_layouts[name] = layout;
697
698
// Specify input layers to have the same data layout.
699
for (int j = 0; j < layer.input_size(); ++j)
700
{
701
name = getNodeName(layer.input(j));
702
it = data_layouts.find(name);
703
if (it != data_layouts.end())
704
{
705
if (layout != DATA_LAYOUT_UNKNOWN)
706
{
707
if (it->second == DATA_LAYOUT_UNKNOWN)
708
it->second = layout;
709
else if (it->second != layout)
710
it->second = DATA_LAYOUT_UNKNOWN;
711
}
712
}
713
else
714
data_layouts[name] = layout;
715
}
716
}
717
718
// find all Const layers for params
719
std::map<String, int> value_id;
720
// A map with constant blobs which are shared between multiple layers.
721
std::map<String, Mat> sharedWeights;
722
addConstNodes(netBin, value_id, layers_to_ignore);
723
addConstNodes(netTxt, value_id, layers_to_ignore);
724
725
std::map<String, int> layer_id;
726
727
for (int li = 0; li < layersSize; li++)
728
{
729
tensorflow::NodeDef layer = net.node(li);
730
String name = layer.name();
731
String type = layer.op();
732
LayerParams layerParams;
733
734
if(layers_to_ignore.find(name) != layers_to_ignore.end())
735
continue;
736
737
int predictedLayout = predictOutputDataLayout(net, layer, data_layouts);
738
data_layouts[name] = predictedLayout;
739
740
if (type == "Conv2D" || type == "SpaceToBatchND" || type == "DepthwiseConv2dNative" || type == "Pad")
741
{
742
// The first node of dilated convolution subgraph.
743
// Extract input node, dilation rate and paddings.
744
std::string input = layer.input(0);
745
StrIntVector next_layers;
746
if (type == "SpaceToBatchND" || type == "Pad")
747
{
748
next_layers = getNextLayers(net, name, "Conv2D");
749
if (next_layers.empty())
750
next_layers = getNextLayers(net, name, "DepthwiseConv2dNative");
751
}
752
if (type == "SpaceToBatchND")
753
{
754
// op: "SpaceToBatchND"
755
// input: "input"
756
// input: "SpaceToBatchND/block_shape"
757
// input: "SpaceToBatchND/paddings"
758
CV_Assert(layer.input_size() == 3);
759
760
DictValue dilation = parseDims(getConstBlob(layer, value_id, 1));
761
CV_Assert(dilation.size() == 2);
762
layerParams.set("dilation_h", dilation.get<int>(0));
763
layerParams.set("dilation_w", dilation.get<int>(1));
764
765
Mat paddings;
766
parseTensor<int>(getConstBlob(layer, value_id, 2), paddings);
767
768
// paddings is a 2x2 matrix: [[top, bot], [left, right]]
769
layerParams.set("pad_h", paddings.at<float>(0));
770
layerParams.set("pad_w", paddings.at<float>(2));
771
772
CV_Assert(next_layers.size() == 1);
773
layer = net.node(next_layers[0].second);
774
layers_to_ignore.insert(next_layers[0].first);
775
name = layer.name();
776
type = layer.op();
777
}
778
else if (type == "Pad")
779
{
780
Mat paddings = getTensorContent(getConstBlob(layer, value_id, 1));
781
CV_Assert(paddings.type() == CV_32SC1);
782
if (paddings.total() == 8)
783
{
784
// Perhabs, we have NHWC padding dimensions order.
785
// N H W C
786
// 0 1 2 3 4 5 6 7
787
std::swap(paddings.at<int32_t>(2), paddings.at<int32_t>(6));
788
std::swap(paddings.at<int32_t>(3), paddings.at<int32_t>(7));
789
// N C W H
790
// 0 1 2 3 4 5 6 7
791
std::swap(paddings.at<int32_t>(4), paddings.at<int32_t>(6));
792
std::swap(paddings.at<int32_t>(5), paddings.at<int32_t>(7));
793
// N C H W
794
// 0 1 2 3 4 5 6 7
795
}
796
if (next_layers.empty() || paddings.total() != 8 ||
797
paddings.at<int32_t>(4) != paddings.at<int32_t>(5) ||
798
paddings.at<int32_t>(6) != paddings.at<int32_t>(7))
799
{
800
// Just a single padding layer.
801
layerParams.set("paddings", DictValue::arrayInt<int*>((int*)paddings.data, paddings.total()));
802
803
int id = dstNet.addLayer(name, "Padding", layerParams);
804
layer_id[name] = id;
805
806
connect(layer_id, dstNet, parsePin(input), id, 0);
807
continue;
808
}
809
else
810
{
811
// Merge with subsequent convolutional layer.
812
CV_Assert(next_layers.size() == 1);
813
814
layerParams.set("pad_h", paddings.at<int32_t>(4));
815
layerParams.set("pad_w", paddings.at<int32_t>(6));
816
817
layer = net.node(next_layers[0].second);
818
layers_to_ignore.insert(next_layers[0].first);
819
name = layer.name();
820
type = layer.op();
821
}
822
}
823
824
// For the object detection networks, TensorFlow Object Detection API
825
// predicts deltas for bounding boxes in yxYX (ymin, xmin, ymax, xmax)
826
// order. We can manage it at DetectionOutput layer parsing predictions
827
// or shuffle last convolution's weights.
828
bool locPredTransposed = hasLayerAttr(layer, "loc_pred_transposed") &&
829
getLayerAttr(layer, "loc_pred_transposed").b();
830
831
layerParams.set("bias_term", false);
832
layerParams.blobs.resize(1);
833
834
next_layers = getNextLayers(net, name, "BiasAdd");
835
if (next_layers.size() == 1) {
836
layerParams.set("bias_term", true);
837
layerParams.blobs.resize(2);
838
839
int weights_layer_index = next_layers[0].second;
840
841
blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]);
842
ExcludeLayer(net, weights_layer_index, 0, false);
843
layers_to_ignore.insert(next_layers[0].first);
844
845
// Shuffle bias from yxYX to xyXY.
846
if (locPredTransposed)
847
{
848
const int numWeights = layerParams.blobs[1].total();
849
float* biasData = reinterpret_cast<float*>(layerParams.blobs[1].data);
850
CV_Assert(numWeights % 4 == 0);
851
for (int i = 0; i < numWeights; i += 2)
852
{
853
std::swap(biasData[i], biasData[i + 1]);
854
}
855
}
856
}
857
858
int kernelTensorInpId = -1;
859
const tensorflow::TensorProto& kernelTensor = getConstBlob(layer, value_id, -1, &kernelTensorInpId);
860
const String kernelTensorName = layer.input(kernelTensorInpId);
861
std::map<String, Mat>::iterator sharedWeightsIt = sharedWeights.find(kernelTensorName);
862
if (sharedWeightsIt == sharedWeights.end())
863
{
864
kernelFromTensor(kernelTensor, layerParams.blobs[0]);
865
releaseTensor(const_cast<tensorflow::TensorProto*>(&kernelTensor));
866
867
int* kshape = layerParams.blobs[0].size.p;
868
const int outCh = kshape[0];
869
const int inCh = kshape[1];
870
const int height = kshape[2];
871
const int width = kshape[3];
872
if (type == "DepthwiseConv2dNative")
873
{
874
CV_Assert(!locPredTransposed);
875
const int chMultiplier = kshape[0];
876
877
Mat copy = layerParams.blobs[0].clone();
878
float* src = (float*)copy.data;
879
float* dst = (float*)layerParams.blobs[0].data;
880
for (int i = 0; i < chMultiplier; ++i)
881
for (int j = 0; j < inCh; ++j)
882
for (int s = 0; s < height * width; ++s)
883
{
884
int src_i = (i * inCh + j) * height * width + s;
885
int dst_i = (j * chMultiplier + i) * height* width + s;
886
dst[dst_i] = src[src_i];
887
}
888
// TODO Use reshape instead
889
kshape[0] = inCh * chMultiplier;
890
kshape[1] = 1;
891
size_t* kstep = layerParams.blobs[0].step.p;
892
kstep[0] = kstep[1]; // fix steps too
893
}
894
895
// Shuffle output channels from yxYX to xyXY.
896
if (locPredTransposed)
897
{
898
const int slice = height * width * inCh;
899
for (int i = 0; i < outCh; i += 2)
900
{
901
cv::Mat src(1, slice, CV_32F, layerParams.blobs[0].ptr<float>(i));
902
cv::Mat dst(1, slice, CV_32F, layerParams.blobs[0].ptr<float>(i + 1));
903
std::swap_ranges(src.begin<float>(), src.end<float>(), dst.begin<float>());
904
}
905
}
906
sharedWeights[kernelTensorName] = layerParams.blobs[0];
907
}
908
else
909
{
910
layerParams.blobs[0] = sharedWeightsIt->second;
911
}
912
913
layerParams.set("kernel_h", layerParams.blobs[0].size[2]);
914
layerParams.set("kernel_w", layerParams.blobs[0].size[3]);
915
layerParams.set("num_output", layerParams.blobs[0].size[0]);
916
917
setStrides(layerParams, layer);
918
if (!layerParams.has("pad_w") && !layerParams.has("pad_h"))
919
setPadding(layerParams, layer);
920
921
// The final node of dilated convolution subgraph.
922
next_layers = getNextLayers(net, name, "BatchToSpaceND");
923
if (!next_layers.empty())
924
{
925
CV_Assert(next_layers.size() == 1);
926
ExcludeLayer(net, next_layers[0].second, 0, false);
927
layers_to_ignore.insert(next_layers[0].first);
928
}
929
930
int id = dstNet.addLayer(name, "Convolution", layerParams);
931
layer_id[name] = id;
932
933
// one input only
934
connect(layer_id, dstNet, parsePin(input), id, 0);
935
936
937
if (getDataLayout(name, data_layouts) == DATA_LAYOUT_UNKNOWN)
938
data_layouts[name] = DATA_LAYOUT_NHWC;
939
}
940
else if (type == "BiasAdd" || type == "Add")
941
{
942
bool haveConst = false;
943
for(int ii = 0; !haveConst && ii < layer.input_size(); ++ii)
944
{
945
Pin input = parsePin(layer.input(ii));
946
haveConst = value_id.find(input.name) != value_id.end();
947
}
948
CV_Assert(!haveConst || layer.input_size() == 2);
949
950
if (haveConst)
951
{
952
Mat values = getTensorContent(getConstBlob(layer, value_id));
953
CV_Assert(values.type() == CV_32FC1);
954
955
int id;
956
if (values.total() == 1) // is a scalar.
957
{
958
layerParams.set("shift", values.at<float>(0));
959
id = dstNet.addLayer(name, "Power", layerParams);
960
}
961
else // is a vector
962
{
963
layerParams.blobs.resize(1, values);
964
id = dstNet.addLayer(name, "Shift", layerParams);
965
}
966
layer_id[name] = id;
967
968
// one input only
969
connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
970
}
971
else
972
{
973
layerParams.set("operation", "sum");
974
int id = dstNet.addLayer(name, "Eltwise", layerParams);
975
layer_id[name] = id;
976
977
for (int ii = 0; ii < layer.input_size(); ii++)
978
{
979
Pin inp = parsePin(layer.input(ii));
980
if (layer_id.find(inp.name) == layer_id.end())
981
CV_Error(Error::StsError, "Input layer not found: " + inp.name);
982
connect(layer_id, dstNet, inp, id, ii);
983
}
984
}
985
}
986
else if (type == "Sub")
987
{
988
bool haveConst = false;
989
for(int ii = 0; !haveConst && ii < layer.input_size(); ++ii)
990
{
991
Pin input = parsePin(layer.input(ii));
992
haveConst = value_id.find(input.name) != value_id.end();
993
}
994
CV_Assert(haveConst);
995
996
Mat values = getTensorContent(getConstBlob(layer, value_id));
997
CV_Assert(values.type() == CV_32FC1);
998
values *= -1.0f;
999
1000
int id;
1001
if (values.total() == 1) // is a scalar.
1002
{
1003
layerParams.set("shift", values.at<float>(0));
1004
id = dstNet.addLayer(name, "Power", layerParams);
1005
}
1006
else // is a vector
1007
{
1008
layerParams.blobs.resize(1, values);
1009
id = dstNet.addLayer(name, "Shift", layerParams);
1010
}
1011
layer_id[name] = id;
1012
1013
// one input only
1014
connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1015
}
1016
else if (type == "MatMul")
1017
{
1018
CV_Assert(layer.input_size() == 2);
1019
1020
// For the object detection networks, TensorFlow Object Detection API
1021
// predicts deltas for bounding boxes in yxYX (ymin, xmin, ymax, xmax)
1022
// order. We can manage it at DetectionOutput layer parsing predictions
1023
// or shuffle last Faster-RCNN's matmul weights.
1024
bool locPredTransposed = hasLayerAttr(layer, "loc_pred_transposed") &&
1025
getLayerAttr(layer, "loc_pred_transposed").b();
1026
1027
layerParams.set("bias_term", false);
1028
layerParams.blobs.resize(1);
1029
1030
StrIntVector next_layers = getNextLayers(net, name, "BiasAdd");
1031
if (next_layers.empty())
1032
{
1033
next_layers = getNextLayers(net, name, "Add");
1034
}
1035
if (next_layers.size() == 1) {
1036
layerParams.set("bias_term", true);
1037
layerParams.blobs.resize(2);
1038
1039
int weights_layer_index = next_layers[0].second;
1040
blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]);
1041
ExcludeLayer(net, weights_layer_index, 0, false);
1042
layers_to_ignore.insert(next_layers[0].first);
1043
1044
if (locPredTransposed)
1045
{
1046
const int numWeights = layerParams.blobs[1].total();
1047
float* biasData = reinterpret_cast<float*>(layerParams.blobs[1].data);
1048
CV_Assert(numWeights % 4 == 0);
1049
for (int i = 0; i < numWeights; i += 2)
1050
{
1051
std::swap(biasData[i], biasData[i + 1]);
1052
}
1053
}
1054
}
1055
1056
int kernel_blob_index = -1;
1057
const tensorflow::TensorProto& kernelTensor = getConstBlob(layer, value_id, -1, &kernel_blob_index);
1058
blobFromTensor(kernelTensor, layerParams.blobs[0]);
1059
releaseTensor(const_cast<tensorflow::TensorProto*>(&kernelTensor));
1060
1061
if (kernel_blob_index == 1) { // In this case output is computed by x*W formula - W should be transposed
1062
Mat data = layerParams.blobs[0].t();
1063
layerParams.blobs[0] = data.clone();
1064
}
1065
1066
layerParams.set("num_output", layerParams.blobs[0].size[0]);
1067
if (locPredTransposed)
1068
{
1069
CV_Assert(layerParams.blobs[0].dims == 2);
1070
for (int i = 0; i < layerParams.blobs[0].size[0]; i += 2)
1071
{
1072
cv::Mat src = layerParams.blobs[0].row(i);
1073
cv::Mat dst = layerParams.blobs[0].row(i + 1);
1074
std::swap_ranges(src.begin<float>(), src.end<float>(), dst.begin<float>());
1075
}
1076
}
1077
1078
int id = dstNet.addLayer(name, "InnerProduct", layerParams);
1079
layer_id[name] = id;
1080
1081
// one input only
1082
int input_blob_index = kernel_blob_index == 0 ? 1 : 0;
1083
connect(layer_id, dstNet, parsePin(layer.input(input_blob_index)), id, 0);
1084
data_layouts[name] = DATA_LAYOUT_PLANAR;
1085
}
1086
else if (type == "Reshape")
1087
{
1088
Pin inpId = parsePin(layer.input(0));
1089
int inpLayout = getDataLayout(layer.input(0), data_layouts);
1090
// There are two possible implementations: reshape an input using
1091
// predefined sizes or use a second input blob as a source of new shape.
1092
if (value_id.find(layer.input(1)) != value_id.end())
1093
{
1094
Mat newShape = getTensorContent(getConstBlob(layer, value_id, 1));
1095
1096
if (newShape.total() != 4 && inpLayout == DATA_LAYOUT_NHWC)
1097
{
1098
LayerParams permLP;
1099
int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC.
1100
permLP.set("order", DictValue::arrayInt<int*>(order, 4));
1101
1102
std::string permName = name + "/nchw";
1103
CV_Assert(layer_id.find(permName) == layer_id.end());
1104
int permId = dstNet.addLayer(permName, "Permute", permLP);
1105
layer_id[permName] = permId;
1106
connect(layer_id, dstNet, inpId, permId, 0);
1107
inpId = Pin(permName);
1108
inpLayout = DATA_LAYOUT_NCHW;
1109
}
1110
else if (newShape.total() == 4 && inpLayout == DATA_LAYOUT_NHWC)
1111
{
1112
// NHWC->NCHW
1113
std::swap(*newShape.ptr<int32_t>(0, 2), *newShape.ptr<int32_t>(0, 3));
1114
std::swap(*newShape.ptr<int32_t>(0, 1), *newShape.ptr<int32_t>(0, 2));
1115
}
1116
layerParams.set("dim", DictValue::arrayInt<int*>(newShape.ptr<int>(), newShape.total()));
1117
1118
int id = dstNet.addLayer(name, "Reshape", layerParams);
1119
layer_id[name] = id;
1120
1121
// one input only
1122
connect(layer_id, dstNet, inpId, id, 0);
1123
data_layouts[name] = newShape.total() == 2 ? DATA_LAYOUT_PLANAR : inpLayout;
1124
}
1125
else
1126
{
1127
int id = dstNet.addLayer(name, "Reshape", layerParams);
1128
layer_id[name] = id;
1129
connect(layer_id, dstNet, inpId, id, 0);
1130
connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1);
1131
data_layouts[name] = inpLayout;
1132
}
1133
}
1134
else if (type == "Flatten" || type == "Squeeze")
1135
{
1136
Pin inpId = parsePin(layer.input(0));
1137
int inpLayout = getDataLayout(layer.input(0), data_layouts);
1138
if (type == "Squeeze")
1139
{
1140
CV_Assert(hasLayerAttr(layer, "squeeze_dims"));
1141
const tensorflow::AttrValue& dims = getLayerAttr(layer, "squeeze_dims");
1142
if (inpLayout == DATA_LAYOUT_NHWC)
1143
{
1144
if (dims.list().i_size() != 2 || dims.list().i(0) != 1 || dims.list().i(1) != 2)
1145
CV_Error(Error::StsNotImplemented, "Unsupported squeeze configuration");
1146
}
1147
else if (inpLayout == DATA_LAYOUT_NCHW)
1148
{
1149
if (dims.list().i_size() != 2 || dims.list().i(0) != 2 || dims.list().i(1) != 3)
1150
CV_Error(Error::StsNotImplemented, "Unsupported squeeze configuration");
1151
}
1152
else
1153
CV_Error(Error::StsNotImplemented, "Unsupported squeeze configuration");
1154
}
1155
if (inpLayout == DATA_LAYOUT_NHWC)
1156
{
1157
LayerParams permLP;
1158
int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC.
1159
permLP.set("order", DictValue::arrayInt<int*>(order, 4));
1160
1161
std::string permName = name + "/nchw";
1162
CV_Assert(layer_id.find(permName) == layer_id.end());
1163
int permId = dstNet.addLayer(permName, "Permute", permLP);
1164
layer_id[permName] = permId;
1165
connect(layer_id, dstNet, inpId, permId, 0);
1166
inpId = Pin(permName);
1167
}
1168
int id = dstNet.addLayer(name, "Flatten", layerParams);
1169
layer_id[name] = id;
1170
connect(layer_id, dstNet, inpId, id, 0);
1171
data_layouts[name] = DATA_LAYOUT_PLANAR;
1172
}
1173
else if (type == "Transpose")
1174
{
1175
Mat perm = getTensorContent(getConstBlob(layer, value_id, 1));
1176
CV_Assert(perm.type() == CV_32SC1);
1177
int* permData = (int*)perm.data;
1178
if (perm.total() == 4)
1179
{
1180
// Only NHWC <-> NCHW permutations are allowed. OpenCV is always
1181
// keep NCHW layout this way.
1182
int inpLayout = getDataLayout(layer.input(0), data_layouts);
1183
if (inpLayout == DATA_LAYOUT_NHWC)
1184
{
1185
if (permData[0] == 0 && permData[1] == 3 && permData[2] == 1 && permData[3] == 2)
1186
{
1187
// in TensorFlow: NHWC->NCHW
1188
// in OpenCV: NCHW->NCHW
1189
data_layouts[name] = DATA_LAYOUT_NCHW;
1190
}
1191
else if (permData[0] == 0 && permData[1] == 1 && permData[2] == 2 && permData[3] == 3)
1192
{
1193
// in TensorFlow: NHWC->NHWC
1194
// in OpenCV: NCHW->NCHW
1195
data_layouts[name] = DATA_LAYOUT_NHWC;
1196
}
1197
else
1198
CV_Error(Error::StsParseError, "Only NHWC <-> NCHW permutations are allowed.");
1199
}
1200
else if (inpLayout == DATA_LAYOUT_NCHW)
1201
{
1202
if (permData[0] == 0 && permData[1] == 2 && permData[2] == 3 && permData[3] == 1)
1203
{
1204
// in TensorFlow: NCHW->NHWC
1205
// in OpenCV: NCHW->NCHW
1206
data_layouts[name] = DATA_LAYOUT_NHWC;
1207
}
1208
else if (permData[0] == 0 && permData[1] == 1 && permData[2] == 2 && permData[3] == 3)
1209
{
1210
// in TensorFlow: NCHW->NCHW
1211
// in OpenCV: NCHW->NCHW
1212
data_layouts[name] = DATA_LAYOUT_NCHW;
1213
}
1214
else
1215
CV_Error(Error::StsParseError, "Only NHWC <-> NCHW permutations are allowed.");
1216
}
1217
int id = dstNet.addLayer(name, "Identity", layerParams);
1218
layer_id[name] = id;
1219
connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1220
}
1221
else
1222
{
1223
layerParams.set("order", DictValue::arrayInt<int*>(permData, perm.total()));
1224
1225
int id = dstNet.addLayer(name, "Permute", layerParams);
1226
layer_id[name] = id;
1227
1228
// one input only
1229
connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1230
data_layouts[name] = DATA_LAYOUT_UNKNOWN;
1231
}
1232
}
1233
else if (type == "Const")
1234
{
1235
}
1236
else if (type == "LRN")
1237
{
1238
if(hasLayerAttr(layer, "alpha")) {
1239
layerParams.set("alpha", getLayerAttr(layer, "alpha").f());
1240
}
1241
if(hasLayerAttr(layer, "beta")) {
1242
layerParams.set("beta", getLayerAttr(layer, "beta").f());
1243
}
1244
if(hasLayerAttr(layer, "depth_radius")) {
1245
int radius = (int)getLayerAttr(layer, "depth_radius").i();
1246
layerParams.set("local_size", 2*radius + 1);
1247
}
1248
if(hasLayerAttr(layer, "bias")) {
1249
layerParams.set("bias", getLayerAttr(layer, "bias").f());
1250
}
1251
layerParams.set("norm_by_size", false);
1252
1253
int id = dstNet.addLayer(name, "LRN", layerParams);
1254
layer_id[name] = id;
1255
1256
connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size());
1257
}
1258
else if (type == "Concat" || type == "ConcatV2")
1259
{
1260
int axisId = (type == "Concat" ? 0 : layer.input_size() - 1);
1261
int axis = getConstBlob(layer, value_id, axisId).int_val().Get(0);
1262
1263
if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC)
1264
axis = toNCHW(axis);
1265
layerParams.set("axis", axis);
1266
1267
int id = dstNet.addLayer(name, "Concat", layerParams);
1268
layer_id[name] = id;
1269
1270
1271
int from = (type == "Concat" ? 1 : 0);
1272
int to = (type == "Concat" ? layer.input_size() : layer.input_size() - 1);
1273
1274
// input(0) or input(n-1) is concat_dim
1275
for (int ii = from; ii < to; ii++)
1276
{
1277
Pin inp = parsePin(layer.input(ii));
1278
if (layer_id.find(inp.name) == layer_id.end())
1279
CV_Error(Error::StsError, "Input layer not found: " + inp.name);
1280
connect(layer_id, dstNet, inp, id, ii - from);
1281
}
1282
}
1283
else if (type == "MaxPool")
1284
{
1285
layerParams.set("pool", "max");
1286
1287
setKSize(layerParams, layer);
1288
setStrides(layerParams, layer);
1289
setPadding(layerParams, layer);
1290
1291
int id = dstNet.addLayer(name, "Pooling", layerParams);
1292
layer_id[name] = id;
1293
1294
connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size());
1295
}
1296
else if (type == "AvgPool")
1297
{
1298
layerParams.set("pool", "ave");
1299
layerParams.set("ave_pool_padded_area", false);
1300
1301
setKSize(layerParams, layer);
1302
setStrides(layerParams, layer);
1303
setPadding(layerParams, layer);
1304
1305
int id = dstNet.addLayer(name, "Pooling", layerParams);
1306
layer_id[name] = id;
1307
1308
connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size());
1309
}
1310
else if (type == "Placeholder")
1311
{
1312
if (!hasLayerAttr(layer, "dtype") ||
1313
getLayerAttr(layer, "dtype").type() != tensorflow::DT_BOOL) // If input is not a train/test flag.
1314
{
1315
netInputsNames.push_back(name);
1316
layer_id[name] = 0;
1317
}
1318
}
1319
else if (type == "Split") {
1320
// TODO: determining axis index remapping by input dimensions order of input blob
1321
// TODO: slicing input may be Const op
1322
// TODO: slicing kernels for convolutions - in current implementation it is impossible
1323
// TODO: add parsing num of slices parameter
1324
CV_Assert(layer.input_size() == 2);
1325
// num_split
1326
// 1st blob is dims tensor
1327
int axis = getConstBlob(layer, value_id, 0).int_val().Get(0);
1328
layerParams.set("axis", toNCHW(axis));
1329
1330
int id = dstNet.addLayer(name, "Slice", layerParams);
1331
layer_id[name] = id;
1332
1333
// one input only
1334
connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0);
1335
}
1336
else if (type == "Slice")
1337
{
1338
// op: "Slice"
1339
// input: "input_node"
1340
// input: "Slice/begin"
1341
// input: "Slice/size"
1342
CV_Assert(layer.input_size() == 3);
1343
Mat begins = getTensorContent(getConstBlob(layer, value_id, 1));
1344
Mat sizes = getTensorContent(getConstBlob(layer, value_id, 2));
1345
CV_Assert_N(!begins.empty(), !sizes.empty());
1346
CV_CheckTypeEQ(begins.type(), CV_32SC1, "");
1347
CV_CheckTypeEQ(sizes.type(), CV_32SC1, "");
1348
1349
if (begins.total() == 4 && getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC)
1350
{
1351
// Swap NHWC parameters' order to NCHW.
1352
std::swap(*begins.ptr<int32_t>(0, 2), *begins.ptr<int32_t>(0, 3));
1353
std::swap(*begins.ptr<int32_t>(0, 1), *begins.ptr<int32_t>(0, 2));
1354
std::swap(*sizes.ptr<int32_t>(0, 2), *sizes.ptr<int32_t>(0, 3));
1355
std::swap(*sizes.ptr<int32_t>(0, 1), *sizes.ptr<int32_t>(0, 2));
1356
}
1357
layerParams.set("begin", DictValue::arrayInt((int*)begins.data, begins.total()));
1358
layerParams.set("size", DictValue::arrayInt((int*)sizes.data, sizes.total()));
1359
1360
int id = dstNet.addLayer(name, "Slice", layerParams);
1361
layer_id[name] = id;
1362
1363
connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1364
}
1365
else if (type == "Mul")
1366
{
1367
bool haveConst = false;
1368
for(int ii = 0; !haveConst && ii < layer.input_size(); ++ii)
1369
{
1370
Pin input = parsePin(layer.input(ii));
1371
haveConst = value_id.find(input.name) != value_id.end();
1372
}
1373
CV_Assert(!haveConst || layer.input_size() == 2);
1374
1375
if (haveConst)
1376
{
1377
// Multiplication by constant.
1378
CV_Assert(layer.input_size() == 2);
1379
Mat scaleMat = getTensorContent(getConstBlob(layer, value_id));
1380
CV_Assert(scaleMat.type() == CV_32FC1);
1381
1382
int id;
1383
if (scaleMat.total() == 1) // is a scalar.
1384
{
1385
// Try to match with a LeakyRelu:
1386
// node {
1387
// name: "LeakyRelu/mul"
1388
// op: "Mul"
1389
// input: "LeakyRelu/alpha"
1390
// input: "input"
1391
// }
1392
// node {
1393
// name: "LeakyRelu/Maximum"
1394
// op: "Maximum"
1395
// input: "LeakyRelu/mul"
1396
// input: "input"
1397
// }
1398
StrIntVector next_layers = getNextLayers(net, name, "Maximum");
1399
if (!next_layers.empty())
1400
{
1401
int maximumLayerIdx = next_layers[0].second;
1402
1403
CV_Assert(net.node(maximumLayerIdx).input_size() == 2);
1404
1405
// The input from the Mul layer can also be at index 1.
1406
int mulInputIdx = (net.node(maximumLayerIdx).input(0) == name) ? 0 : 1;
1407
1408
ExcludeLayer(net, maximumLayerIdx, mulInputIdx, false);
1409
layers_to_ignore.insert(next_layers[0].first);
1410
1411
layerParams.set("negative_slope", scaleMat.at<float>(0));
1412
id = dstNet.addLayer(name, "ReLU", layerParams);
1413
}
1414
else
1415
{
1416
// Just a multiplication.
1417
layerParams.set("scale", scaleMat.at<float>(0));
1418
id = dstNet.addLayer(name, "Power", layerParams);
1419
}
1420
}
1421
else // is a vector
1422
{
1423
layerParams.blobs.resize(1, scaleMat);
1424
1425
StrIntVector next_layers = getNextLayers(net, name, "Add");
1426
if (!next_layers.empty())
1427
{
1428
layerParams.set("bias_term", true);
1429
layerParams.blobs.resize(2);
1430
1431
int weights_layer_index = next_layers[0].second;
1432
blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs.back());
1433
ExcludeLayer(net, weights_layer_index, 0, false);
1434
layers_to_ignore.insert(next_layers[0].first);
1435
}
1436
1437
if (hasLayerAttr(layer, "axis"))
1438
layerParams.set("axis", getLayerAttr(layer, "axis").i());
1439
1440
id = dstNet.addLayer(name, "Scale", layerParams);
1441
}
1442
layer_id[name] = id;
1443
1444
Pin inp0 = parsePin(layer.input(0));
1445
if (layer_id.find(inp0.name) != layer_id.end())
1446
// First operand is a constant.
1447
connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1448
else
1449
connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0);
1450
}
1451
else
1452
{
1453
layerParams.set("operation", "prod");
1454
int id = dstNet.addLayer(name, "Eltwise", layerParams);
1455
layer_id[name] = id;
1456
1457
for (int ii = 0; ii < layer.input_size(); ii++)
1458
{
1459
Pin inp = parsePin(layer.input(ii));
1460
if (layer_id.find(inp.name) == layer_id.end())
1461
CV_Error(Error::StsError, "Input layer not found: " + inp.name);
1462
connect(layer_id, dstNet, inp, id, ii);
1463
}
1464
}
1465
}
1466
else if (type == "FusedBatchNorm")
1467
{
1468
// op: "FusedBatchNorm"
1469
// input: "input"
1470
// input: "BatchNorm/gamma"
1471
// input: "BatchNorm/beta"
1472
// input: "BatchNorm/moving_mean"
1473
// input: "BatchNorm/moving_variance"
1474
if (layer.input_size() != 5)
1475
CV_Error(Error::StsNotImplemented,
1476
"Expected gamma, beta, mean and std");
1477
Pin inpId = parsePin(layer.input(0));
1478
1479
bool isTraining = hasLayerAttr(layer, "is_training") && getLayerAttr(layer, "is_training").b();
1480
1481
layerParams.blobs.resize(2);
1482
1483
const tensorflow::TensorProto& gammaTensor = getConstBlob(layer, value_id, 1);
1484
if (!gammaTensor.tensor_content().empty())
1485
{
1486
layerParams.blobs.resize(layerParams.blobs.size() + 1);
1487
layerParams.set("has_weight", true);
1488
blobFromTensor(gammaTensor, layerParams.blobs.back());
1489
}
1490
else
1491
layerParams.set("has_weight", false);
1492
1493
const tensorflow::TensorProto& betaTensor = getConstBlob(layer, value_id, 2);
1494
if (!betaTensor.tensor_content().empty())
1495
{
1496
layerParams.blobs.resize(layerParams.blobs.size() + 1);
1497
layerParams.set("has_bias", true);
1498
blobFromTensor(betaTensor, layerParams.blobs.back());
1499
}
1500
else
1501
layerParams.set("has_bias", false);
1502
1503
Mat mean, std;
1504
if (isTraining)
1505
{
1506
if (layerParams.blobs.size() == 2)
1507
CV_Error(Error::StsNotImplemented, "Cannot determine number "
1508
"of parameters for batch normalization layer.");
1509
mean = Mat::zeros(1, layerParams.blobs[3].total(), CV_32F);
1510
std = Mat::ones(1, layerParams.blobs[3].total(), CV_32F);
1511
1512
// Add an extra layer: Mean-Variance normalization
1513
LayerParams mvnParams;
1514
std::string mvnName = name + "/MVN";
1515
CV_Assert(layer_id.find(mvnName) == layer_id.end());
1516
int mvnId = dstNet.addLayer(mvnName, "MVN", mvnParams);
1517
layer_id[mvnName] = mvnId;
1518
connect(layer_id, dstNet, inpId, mvnId, 0);
1519
inpId = Pin(mvnName);
1520
}
1521
else
1522
{
1523
blobFromTensor(getConstBlob(layer, value_id, 3), mean);
1524
blobFromTensor(getConstBlob(layer, value_id, 4), std);
1525
}
1526
layerParams.blobs[0] = mean;
1527
layerParams.blobs[1] = std;
1528
1529
if (hasLayerAttr(layer, "epsilon"))
1530
layerParams.set("eps", getLayerAttr(layer, "epsilon").f());
1531
1532
int id = dstNet.addLayer(name, "BatchNorm", layerParams);
1533
layer_id[name] = id;
1534
1535
// one input only
1536
connect(layer_id, dstNet, inpId, id, 0);
1537
}
1538
else if (type == "Conv2DBackpropInput")
1539
{
1540
// op: "Conv2DBackpropInput"
1541
// input: "conv2d_transpose/output_shape"
1542
// input: "weights"
1543
// input: "input"
1544
if (layer.input_size() != 3)
1545
CV_Error(Error::StsNotImplemented,
1546
"Expected output shape, weights and input nodes");
1547
1548
layerParams.set("bias_term", false);
1549
layerParams.blobs.resize(1);
1550
1551
StrIntVector next_layers = getNextLayers(net, name, "BiasAdd");
1552
if (next_layers.size() == 1)
1553
{
1554
layerParams.set("bias_term", true);
1555
layerParams.blobs.resize(2);
1556
1557
int weights_layer_index = next_layers[0].second;
1558
1559
blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]);
1560
ExcludeLayer(net, weights_layer_index, 0, false);
1561
layers_to_ignore.insert(next_layers[0].first);
1562
}
1563
1564
kernelFromTensor(getConstBlob(layer, value_id, 1), layerParams.blobs[0]);
1565
1566
const int* kshape = layerParams.blobs[0].size.p;
1567
const int kernelH = kshape[2];
1568
const int kernelW = kshape[3];
1569
layerParams.set("kernel_h", kernelH);
1570
layerParams.set("kernel_w", kernelW);
1571
layerParams.set("num_output", kshape[1]);
1572
1573
setStrides(layerParams, layer);
1574
setPadding(layerParams, layer);
1575
1576
// For convolution layer, output shape computes as
1577
// o = 1 + (i - k + 2*p) / s
1578
// i - input size, o - output size, k - kernel size, p - pad, s - stride
1579
// In TensorFlow, p == 0 is padMode == 'VALID' or p == (k - 1) / 2
1580
// considering that k is odd.
1581
// SAME: o = 1 + (i - 1) / s
1582
// VALID: o = 1 + i / s
1583
// Deconvolution's layer output shape computes as
1584
// SAME: o = 1 + (i - 1)*s
1585
// VALID: o = (i - 1)*s
1586
// If output_shape differs from formulas above then adjust padding is applied.
1587
1588
const int strideY = layerParams.get<int>("stride_h");
1589
const int strideX = layerParams.get<int>("stride_w");
1590
Mat outShape = getTensorContent(getConstBlob(layer, value_id, 0));
1591
const int outH = outShape.at<int>(1);
1592
const int outW = outShape.at<int>(2);
1593
if (layerParams.get<String>("pad_mode") == "SAME")
1594
{
1595
layerParams.set("adj_w", (outW - 1) % strideX);
1596
layerParams.set("adj_h", (outH - 1) % strideY);
1597
}
1598
else if (layerParams.get<String>("pad_mode") == "VALID")
1599
{
1600
layerParams.set("adj_w", (outW - kernelW) % strideX);
1601
layerParams.set("adj_h", (outH - kernelH) % strideY);
1602
}
1603
int id = dstNet.addLayer(name, "Deconvolution", layerParams);
1604
layer_id[name] = id;
1605
1606
// one input only
1607
connect(layer_id, dstNet, parsePin(layer.input(2)), id, 0);
1608
}
1609
else if (type == "BlockLSTM")
1610
{
1611
// op: "BlockLSTM"
1612
// input: "lstm_block_wrapper/ToInt64/x" (ignore, number of time stamps)
1613
// input: "input"
1614
// input: "lstm_block_wrapper/zeros" (ignore)
1615
// input: "lstm_block_wrapper/zeros" (ignore)
1616
// input: "lstm_block_wrapper/kernel"
1617
// input: "lstm_block_wrapper/w_i_diag"
1618
// input: "lstm_block_wrapper/w_f_diag"
1619
// input: "lstm_block_wrapper/w_o_diag"
1620
// input: "lstm_block_wrapper/bias"
1621
if (layer.input_size() != 9)
1622
CV_Error(Error::StsNotImplemented, "Unexpected number of input nodes");
1623
1624
if (hasLayerAttr(layer, "forget_bias"))
1625
layerParams.set("forget_bias", getLayerAttr(layer, "forget_bias").f());
1626
1627
if (hasLayerAttr(layer, "forget_bias"))
1628
{
1629
float cellClip = getLayerAttr(layer, "cell_clip").f();
1630
// Cell clip disabled if it's negative.
1631
if (cellClip >= 0)
1632
{
1633
layerParams.set("use_cell_clip", true);
1634
layerParams.set("cell_clip", cellClip);
1635
}
1636
}
1637
1638
Mat W, Wh, Wx, b;
1639
blobFromTensor(getConstBlob(layer, value_id, 4), W);
1640
blobFromTensor(getConstBlob(layer, value_id, 8), b);
1641
const int outSize = W.cols / 4;
1642
1643
// IGFO->IFOG
1644
float* weightData = (float*)W.data;
1645
for (int i = 0; i < W.rows; ++i)
1646
for (int j = 0; j < outSize; ++j)
1647
{
1648
std::swap(weightData[i * W.cols + 1 * outSize + j],
1649
weightData[i * W.cols + 2 * outSize + j]);
1650
std::swap(weightData[i * W.cols + 2 * outSize + j],
1651
weightData[i * W.cols + 3 * outSize + j]);
1652
}
1653
Wx = W.rowRange(0, W.rows - outSize).t();
1654
Wh = W.rowRange(W.rows - outSize, W.rows).t();
1655
1656
layerParams.blobs.resize(3);
1657
layerParams.blobs[0] = Wh;
1658
layerParams.blobs[1] = Wx;
1659
layerParams.blobs[2] = b;
1660
1661
if (hasLayerAttr(layer, "use_peephole"))
1662
{
1663
bool usePeephole = getLayerAttr(layer, "use_peephole").b();
1664
if (usePeephole)
1665
{
1666
layerParams.set("use_peephole", true);
1667
layerParams.blobs.resize(6);
1668
for (int i = 0; i < 3; ++i)
1669
{
1670
Mat w;
1671
blobFromTensor(getConstBlob(layer, value_id, 5 + i), w);
1672
w = w.reshape(1, w.total()); // Single column.
1673
w = Mat::diag(w); // Make a diagonal matrix.
1674
layerParams.blobs[3 + i] = w;
1675
}
1676
}
1677
}
1678
1679
int id = dstNet.addLayer(name, "LSTM", layerParams);
1680
layer_id[name] = id;
1681
1682
// one input only
1683
connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0);
1684
data_layouts[name] = DATA_LAYOUT_UNKNOWN;
1685
}
1686
else if (type == "ResizeNearestNeighbor" || type == "ResizeBilinear")
1687
{
1688
if (layer.input_size() == 2)
1689
{
1690
Mat outSize = getTensorContent(getConstBlob(layer, value_id, 1));
1691
CV_CheckTypeEQ(outSize.type(), CV_32SC1, ""); CV_CheckEQ(outSize.total(), (size_t)2, "");
1692
layerParams.set("height", outSize.at<int>(0, 0));
1693
layerParams.set("width", outSize.at<int>(0, 1));
1694
}
1695
else if (layer.input_size() == 3)
1696
{
1697
Mat factorHeight = getTensorContent(getConstBlob(layer, value_id, 1));
1698
Mat factorWidth = getTensorContent(getConstBlob(layer, value_id, 2));
1699
CV_CheckTypeEQ(factorHeight.type(), CV_32SC1, ""); CV_CheckEQ(factorHeight.total(), (size_t)1, "");
1700
CV_CheckTypeEQ(factorWidth.type(), CV_32SC1, ""); CV_CheckEQ(factorWidth.total(), (size_t)1, "");
1701
layerParams.set("zoom_factor_x", factorWidth.at<int>(0));
1702
layerParams.set("zoom_factor_y", factorHeight.at<int>(0));
1703
}
1704
else
1705
CV_Assert(layer.input_size() == 2 || layer.input_size() == 3);
1706
1707
if (type == "ResizeNearestNeighbor")
1708
layerParams.set("interpolation", "nearest");
1709
else
1710
layerParams.set("interpolation", "bilinear");
1711
1712
if (hasLayerAttr(layer, "align_corners"))
1713
layerParams.set("align_corners", getLayerAttr(layer, "align_corners").b());
1714
1715
int id = dstNet.addLayer(name, "Resize", layerParams);
1716
layer_id[name] = id;
1717
1718
connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1719
}
1720
else if (type == "L2Normalize")
1721
{
1722
// op: "L2Normalize"
1723
// input: "input"
1724
// input: "reduction_indices" (axis)
1725
CV_Assert(layer.input_size() == 2);
1726
Mat reductionIndices = getTensorContent(getConstBlob(layer, value_id, 1));
1727
CV_Assert(reductionIndices.type() == CV_32SC1);
1728
1729
const int numAxes = reductionIndices.total();
1730
if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC)
1731
for (int i = 0; i < numAxes; ++i)
1732
reductionIndices.at<int>(i) = toNCHW(reductionIndices.at<int>(i));
1733
1734
cv::sort(reductionIndices, reductionIndices, SORT_ASCENDING);
1735
for (int i = 1; i < numAxes; ++i)
1736
{
1737
CV_Assert(reductionIndices.at<int>(i) == reductionIndices.at<int>(i - 1) + 1);
1738
// Axes have the same sign.
1739
CV_Assert(reductionIndices.at<int>(i) * reductionIndices.at<int>(i - 1) >= 0);
1740
}
1741
layerParams.set("start_axis", reductionIndices.at<int>(0));
1742
layerParams.set("end_axis", reductionIndices.at<int>(numAxes - 1));
1743
1744
int id = dstNet.addLayer(name, "Normalize", layerParams);
1745
layer_id[name] = id;
1746
connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1747
}
1748
else if (type == "PriorBox")
1749
{
1750
if (hasLayerAttr(layer, "min_size"))
1751
layerParams.set("min_size", getLayerAttr(layer, "min_size").i());
1752
if (hasLayerAttr(layer, "max_size"))
1753
layerParams.set("max_size", getLayerAttr(layer, "max_size").i());
1754
if (hasLayerAttr(layer, "flip"))
1755
layerParams.set("flip", getLayerAttr(layer, "flip").b());
1756
if (hasLayerAttr(layer, "clip"))
1757
layerParams.set("clip", getLayerAttr(layer, "clip").b());
1758
if (hasLayerAttr(layer, "offset"))
1759
layerParams.set("offset", getLayerAttr(layer, "offset").f());
1760
if (hasLayerAttr(layer, "step"))
1761
layerParams.set("step", getLayerAttr(layer, "step").f());
1762
1763
const std::string paramNames[] = {"variance", "aspect_ratio", "scales",
1764
"width", "height"};
1765
for (int i = 0; i < 5; ++i)
1766
{
1767
if (hasLayerAttr(layer, paramNames[i]))
1768
{
1769
Mat values = getTensorContent(getLayerAttr(layer, paramNames[i]).tensor());
1770
layerParams.set(paramNames[i],
1771
DictValue::arrayReal<float*>((float*)values.data, values.total()));
1772
}
1773
}
1774
int id = dstNet.addLayer(name, "PriorBox", layerParams);
1775
layer_id[name] = id;
1776
connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1777
connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1);
1778
data_layouts[name] = DATA_LAYOUT_UNKNOWN;
1779
}
1780
else if (type == "Softmax")
1781
{
1782
if (hasLayerAttr(layer, "axis"))
1783
layerParams.set("axis", getLayerAttr(layer, "axis").i());
1784
1785
int id = dstNet.addLayer(name, "Softmax", layerParams);
1786
layer_id[name] = id;
1787
connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size());
1788
}
1789
else if (type == "CropAndResize")
1790
{
1791
// op: "CropAndResize"
1792
// input: "input"
1793
// input: "boxes"
1794
// input: "sizes"
1795
CV_Assert(layer.input_size() == 3);
1796
1797
Mat cropSize = getTensorContent(getConstBlob(layer, value_id, 2));
1798
CV_CheckTypeEQ(cropSize.type(), CV_32SC1, ""); CV_CheckEQ(cropSize.total(), (size_t)2, "");
1799
1800
layerParams.set("height", cropSize.at<int>(0));
1801
layerParams.set("width", cropSize.at<int>(1));
1802
1803
int id = dstNet.addLayer(name, "CropAndResize", layerParams);
1804
layer_id[name] = id;
1805
1806
connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1807
connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1);
1808
}
1809
else if (type == "Mean")
1810
{
1811
Mat indices = getTensorContent(getConstBlob(layer, value_id, 1));
1812
CV_Assert(indices.type() == CV_32SC1);
1813
1814
if (indices.total() != 2 || indices.at<int>(0) != 1 || indices.at<int>(1) != 2)
1815
CV_Error(Error::StsNotImplemented, "Unsupported mode of reduce_mean operation.");
1816
1817
layerParams.set("pool", "ave");
1818
layerParams.set("global_pooling", true);
1819
1820
int id = dstNet.addLayer(name, "Pooling", layerParams);
1821
layer_id[name] = id;
1822
1823
connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1824
1825
// There are two attributes, "keepdims" and a deprecated "keep_dims".
1826
bool keepDims = false;
1827
if (hasLayerAttr(layer, "keepdims"))
1828
keepDims = getLayerAttr(layer, "keepdims").b();
1829
else if (hasLayerAttr(layer, "keep_dims"))
1830
keepDims = getLayerAttr(layer, "keep_dims").b();
1831
1832
if (!keepDims)
1833
{
1834
LayerParams flattenLp;
1835
std::string flattenName = name + "/flatten";
1836
CV_Assert(layer_id.find(flattenName) == layer_id.end());
1837
int flattenId = dstNet.addLayer(flattenName, "Flatten", flattenLp);
1838
layer_id[flattenName] = flattenId;
1839
connect(layer_id, dstNet, Pin(name), flattenId, 0);
1840
}
1841
}
1842
else if (type == "ClipByValue")
1843
{
1844
// op: "ClipByValue"
1845
// input: "input"
1846
// input: "mix"
1847
// input: "max"
1848
CV_Assert(layer.input_size() == 3);
1849
1850
Mat minValue = getTensorContent(getConstBlob(layer, value_id, 1));
1851
Mat maxValue = getTensorContent(getConstBlob(layer, value_id, 2));
1852
CV_CheckEQ(minValue.total(), (size_t)1, ""); CV_CheckTypeEQ(minValue.type(), CV_32FC1, "");
1853
CV_CheckEQ(maxValue.total(), (size_t)1, ""); CV_CheckTypeEQ(maxValue.type(), CV_32FC1, "");
1854
1855
layerParams.set("min_value", minValue.at<float>(0));
1856
layerParams.set("max_value", maxValue.at<float>(0));
1857
1858
int id = dstNet.addLayer(name, "ReLU6", layerParams);
1859
layer_id[name] = id;
1860
1861
connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1862
}
1863
else if (type == "Abs" || type == "Tanh" || type == "Sigmoid" ||
1864
type == "Relu" || type == "Elu" ||
1865
type == "Identity" || type == "Relu6")
1866
{
1867
std::string dnnType = type;
1868
if (type == "Abs") dnnType = "AbsVal";
1869
else if (type == "Tanh") dnnType = "TanH";
1870
else if (type == "Relu") dnnType = "ReLU";
1871
else if (type == "Relu6") dnnType = "ReLU6";
1872
else if (type == "Elu") dnnType = "ELU";
1873
1874
int id = dstNet.addLayer(name, dnnType, layerParams);
1875
layer_id[name] = id;
1876
connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size());
1877
}
1878
else
1879
{
1880
// Importer does not know how to map this TensorFlow's operation onto OpenCV's layer.
1881
// However we create a layer with the same type and rely that user defined a custom layer.
1882
1883
// All the attributes are added to LayerParams.
1884
google::protobuf::Map<std::string, tensorflow::AttrValue> attr = layer.attr();
1885
for (google::protobuf::Map<std::string, tensorflow::AttrValue>::const_iterator ai = attr.begin();
1886
ai != attr.end(); ++ai)
1887
{
1888
if (ai->second.value_case() == tensorflow::AttrValue::kS) // string
1889
layerParams.set(ai->first, ai->second.s());
1890
if (ai->second.value_case() == tensorflow::AttrValue::kI) // int64
1891
layerParams.set(ai->first, ai->second.i());
1892
if (ai->second.value_case() == tensorflow::AttrValue::kF) // float
1893
layerParams.set(ai->first, ai->second.f());
1894
if (ai->second.value_case() == tensorflow::AttrValue::kB) // bool
1895
layerParams.set(ai->first, ai->second.b());
1896
}
1897
1898
// All the Const input nodes are added to layer's blobs.
1899
std::vector<std::string> inputsNames;
1900
for (int i = 0; i < layer.input_size(); ++i)
1901
{
1902
// Check if input is a Const node.
1903
if (value_id.find(layer.input(i)) != value_id.end())
1904
{
1905
Mat blob = getTensorContent(getConstBlob(layer, value_id, i));
1906
layerParams.blobs.push_back(blob);
1907
}
1908
else
1909
inputsNames.push_back(layer.input(i));
1910
}
1911
int id = dstNet.addLayer(name, type, layerParams);
1912
layer_id[name] = id;
1913
1914
for (int i = 0; i < inputsNames.size(); ++i)
1915
{
1916
connect(layer_id, dstNet, parsePin(inputsNames[i]), id, i);
1917
}
1918
}
1919
}
1920
dstNet.setInputsNames(netInputsNames);
1921
}
1922
1923
} // namespace
1924
1925
#endif //HAVE_PROTOBUF
1926
1927
Net readNetFromTensorflow(const String &model, const String &config)
1928
{
1929
TFImporter importer(model.c_str(), config.c_str());
1930
Net net;
1931
importer.populateNet(net);
1932
return net;
1933
}
1934
1935
Net readNetFromTensorflow(const char* bufferModel, size_t lenModel,
1936
const char* bufferConfig, size_t lenConfig)
1937
{
1938
TFImporter importer(bufferModel, lenModel, bufferConfig, lenConfig);
1939
Net net;
1940
importer.populateNet(net);
1941
return net;
1942
}
1943
1944
Net readNetFromTensorflow(const std::vector<uchar>& bufferModel, const std::vector<uchar>& bufferConfig)
1945
{
1946
const char* bufferModelPtr = reinterpret_cast<const char*>(&bufferModel[0]);
1947
const char* bufferConfigPtr = bufferConfig.empty() ? NULL :
1948
reinterpret_cast<const char*>(&bufferConfig[0]);
1949
return readNetFromTensorflow(bufferModelPtr, bufferModel.size(),
1950
bufferConfigPtr, bufferConfig.size());
1951
}
1952
1953
void writeTextGraph(const String& _model, const String& output)
1954
{
1955
String model = _model;
1956
const std::string modelExt = model.substr(model.rfind('.') + 1);
1957
if (modelExt != "pb")
1958
CV_Error(Error::StsNotImplemented, "Only TensorFlow models support export to text file");
1959
1960
tensorflow::GraphDef net;
1961
ReadTFNetParamsFromBinaryFileOrDie(model.c_str(), &net);
1962
1963
sortByExecutionOrder(net);
1964
1965
RepeatedPtrField<tensorflow::NodeDef>::iterator it;
1966
for (it = net.mutable_node()->begin(); it != net.mutable_node()->end(); ++it)
1967
{
1968
if (it->op() == "Const")
1969
{
1970
it->mutable_attr()->at("value").mutable_tensor()->clear_tensor_content();
1971
}
1972
}
1973
1974
std::string content;
1975
google::protobuf::TextFormat::PrintToString(net, &content);
1976
1977
std::ofstream ofs(output.c_str());
1978
ofs << content;
1979
ofs.close();
1980
}
1981
1982
CV__DNN_INLINE_NS_END
1983
}} // namespace
1984
1985