Path: blob/master/modules/dnn/src/tensorflow/tf_importer.cpp
16345 views
// This file is part of OpenCV project.1// It is subject to the license terms in the LICENSE file found in the top-level directory2// of this distribution and at http://opencv.org/license.html.34// Copyright (C) 2016, Intel Corporation, all rights reserved.5// Third party copyrights are property of their respective owners.67/*8Implementation of Tensorflow models parser9*/1011#include "../precomp.hpp"1213#ifdef HAVE_PROTOBUF14#include "tf_io.hpp"1516#include <iostream>17#include <fstream>18#include <algorithm>19#include <string>20#include <queue>21#include "tf_graph_simplifier.hpp"22#endif2324namespace cv {25namespace dnn {26CV__DNN_INLINE_NS_BEGIN2728#if HAVE_PROTOBUF2930using ::google::protobuf::RepeatedField;31using ::google::protobuf::RepeatedPtrField;32using ::google::protobuf::Message;33using ::google::protobuf::Descriptor;34using ::google::protobuf::FieldDescriptor;35using ::google::protobuf::Reflection;3637namespace38{3940static int toNCHW(int idx)41{42CV_Assert(-4 <= idx && idx < 4);43if (idx == 0) return 0;44else if (idx > 0) return idx % 3 + 1;45else return (4 + idx) % 3 + 1;46}4748// This values are used to indicate layer output's data layout where it's possible.49enum DataLayout50{51DATA_LAYOUT_NHWC,52DATA_LAYOUT_NCHW,53DATA_LAYOUT_UNKNOWN,54DATA_LAYOUT_PLANAR // 2-dimensional outputs (matmul, flatten, reshape to 2d)55};5657typedef std::vector<std::pair<String, int> > StrIntVector;5859struct Pin60{61Pin(const std::string &_name, int _blobIndex = 0) :62name(_name), blobIndex(_blobIndex) {}6364Pin() :65name(""), blobIndex(-1) {}6667std::string name;68int blobIndex;69};7071void blobShapeFromTensor(const tensorflow::TensorProto &tensor, MatShape& shape)72{73shape.clear();74if (tensor.has_tensor_shape())75{76const tensorflow::TensorShapeProto &_shape = tensor.tensor_shape();77int i, n = _shape.dim_size();78if (n)79{80shape.resize(n);8182for (i = 0; i < n; i++)83shape[i] = (int)_shape.dim(i).size();84}85else86shape.resize(1, 1); // Scalar.87}88else89{90CV_Error(Error::StsError, "Unknown shape of input tensor");91}92}9394template <typename T>95void parseTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob)96{97MatShape shape;98blobShapeFromTensor(tensor, shape);99int dims = (int)shape.size();100101if (dims == 4)102{103// REORDER blob NHWC to NCHW104swap(shape[2], shape[3]); // NHCW105swap(shape[1], shape[2]); // NCHW106}107108dstBlob.create(shape, CV_32F);109110Mat tensorContent = getTensorContent(tensor);111int size = tensorContent.total();112CV_Assert(size == (int)dstBlob.total());113114float *dstData = dstBlob.ptr<float>();115const T *data = reinterpret_cast<const T*>(tensorContent.data);116117if (dims == 4)118{119int num = shape[0], channels = shape[1], height = shape[2], width = shape[3];120int total = num*channels*height*width;121for(int i_n = 0; i_n < shape[0]; i_n++) {122for(int i_c = 0; i_c < shape[1]; i_c++) {123for(int i_h = 0; i_h < shape[2]; i_h++) {124for(int i_w = 0; i_w < shape[3]; i_w++) {125int dst_i = channels*height*width*i_n + height*width*i_c + width*i_h + i_w;126int src_i = channels*height*width*i_n + i_c + channels*width*i_h + channels*i_w;127128CV_Assert(dst_i < total);129CV_Assert(src_i < total);130131dstData[dst_i] = data[src_i];132}133}134}135}136} else {137for (int i = 0; i < size; i++)138dstData[i] = data[i];139}140}141142void blobFromTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob)143{144switch (tensor.dtype()) {145case tensorflow::DT_FLOAT:146case tensorflow::DT_HALF:147parseTensor<float>(tensor, dstBlob);148break;149case tensorflow::DT_DOUBLE:150parseTensor<double>(tensor, dstBlob);151break;152default:153CV_Error(Error::StsError, "Tensor's data type is not supported");154break;155}156}157158void printList(const tensorflow::AttrValue::ListValue &val)159{160std::cout << "(";161for (int i = 0; i < val.i_size(); i++)162std::cout << " " << val.i(i);163std::cout << " )";164}165166void printTensorShape(const tensorflow::TensorShapeProto &shape)167{168std::cout << "[ ";169for (int d = 0; d < shape.dim_size(); d++)170std::cout << shape.dim(d).name() <<171":" << shape.dim(d).size() << " ";172std::cout << "]";173}174175void printTensor(const tensorflow::TensorProto &tensor)176{177printTensorShape(tensor.tensor_shape());178179if (tensor.tensor_content().empty())180return;181182switch (tensor.dtype())183{184case tensorflow::DT_FLOAT:185{186const float *data = reinterpret_cast<const float*>(tensor.tensor_content().c_str());187int size = tensor.tensor_content().size() / sizeof(float);188for (int i = 0; i < std::min(10, size); i++)189std::cout << " " << data[i];190if (size > 10)191std::cout << " ... " << size - 10 << " more";192break;193}194case tensorflow::DT_INT32:195{196const int *data = reinterpret_cast<const int*>(tensor.tensor_content().c_str());197int size = tensor.tensor_content().size() / sizeof(int);198for (int i = 0; i < std::min(10, size); i++)199std::cout << " " << data[i];200if (size > 10)201std::cout << " ... " << size - 10 << " more";202break;203}204default:205CV_Error(Error::StsError, "Tensor type is not supported");206break;207}208}209210void printLayerAttr(const tensorflow::NodeDef &layer)211{212std::cout << std::endl << layer.name() << ":" << layer.op();213for (int ii = 0; ii < layer.input_size(); ii++)214std::cout << "(" << layer.input(ii) << ")";215std::cout << std::endl;216google::protobuf::Map<std::string, tensorflow::AttrValue> attr217= layer.attr();218for (google::protobuf::Map<std::string, tensorflow::AttrValue>::const_iterator ai = attr.begin();219ai != attr.end(); ++ai)220{221std::cout << ai->first << ":";222if (ai->first == "dtype" || ai->first == "T")223std::cout << ai->second.i();224else if (ai->first == "padding")225std::cout << ai->second.s();226else if (ai->first == "transpose_a" || ai->first == "transpose_b")227std::cout << ai->second.b();228// else if (ai->first == "shape")229// printTensorShape(ai->second.shape());230else if (ai->first == "strides" || ai->first == "ksize")231printList(ai->second.list());232else233printTensor(ai->second.tensor());234std::cout << std::endl;235}236}237238bool hasLayerAttr(const tensorflow::NodeDef &layer, const std::string &name)239{240google::protobuf::Map<std::string, tensorflow::AttrValue> attr = layer.attr();241return attr.find(name) != attr.end();242}243244const tensorflow::AttrValue& getLayerAttr(const tensorflow::NodeDef &layer, const std::string &name)245{246return layer.attr().at(name);247}248249static int getDataLayout(const tensorflow::NodeDef& layer)250{251if (hasLayerAttr(layer, "data_format"))252{253std::string format = getLayerAttr(layer, "data_format").s();254if (format == "NHWC" || format == "channels_last")255return DATA_LAYOUT_NHWC;256else if (format == "NCHW" || format == "channels_first")257return DATA_LAYOUT_NCHW;258else259CV_Error(Error::StsParseError, "Unknown data_format value: " + format);260}261return DATA_LAYOUT_UNKNOWN;262}263264static inline std::string getNodeName(const std::string& tensorName)265{266return tensorName.substr(0, tensorName.rfind(':'));267}268269static inline int getDataLayout(const std::string& layerName,270const std::map<String, int>& data_layouts)271{272std::map<String, int>::const_iterator it = data_layouts.find(getNodeName(layerName));273return it != data_layouts.end() ? it->second : DATA_LAYOUT_UNKNOWN;274}275276void setStrides(LayerParams &layerParams, const tensorflow::NodeDef &layer)277{278if (hasLayerAttr(layer, "strides"))279{280const tensorflow::AttrValue& val = getLayerAttr(layer, "strides");281int dimX, dimY, dimC;282int layout = getDataLayout(layer);283if (layout == DATA_LAYOUT_NCHW)284{285dimC = 1; dimY = 2; dimX = 3;286}287else288{289dimY = 1; dimX = 2; dimC = 3;290}291if (val.list().i_size() != 4 ||292val.list().i(0) != 1 || val.list().i(dimC) != 1)293CV_Error(Error::StsError, "Unsupported strides");294layerParams.set("stride_h", static_cast<int>(val.list().i(dimY)));295layerParams.set("stride_w", static_cast<int>(val.list().i(dimX)));296}297}298299DictValue parseDims(const tensorflow::TensorProto &tensor) {300MatShape shape;301blobShapeFromTensor(tensor, shape);302int dims = (int)shape.size();303304CV_Assert(tensor.dtype() == tensorflow::DT_INT32);305CV_Assert(dims == 1);306307Mat values = getTensorContent(tensor);308CV_Assert(values.type() == CV_32SC1);309// TODO: add reordering shape if dims == 4310return DictValue::arrayInt((int*)values.data, values.total());311}312313void setKSize(LayerParams &layerParams, const tensorflow::NodeDef &layer)314{315if (hasLayerAttr(layer, "ksize"))316{317const tensorflow::AttrValue& val = getLayerAttr(layer, "ksize");318int dimX, dimY, dimC;319int layout = getDataLayout(layer);320if (layout == DATA_LAYOUT_NCHW)321{322dimC = 1; dimY = 2; dimX = 3;323}324else325{326dimY = 1; dimX = 2; dimC = 3;327}328if (val.list().i_size() != 4 ||329val.list().i(0) != 1 || val.list().i(dimC) != 1)330CV_Error(Error::StsError, "Unsupported ksize");331layerParams.set("kernel_h", static_cast<int>(val.list().i(dimY)));332layerParams.set("kernel_w", static_cast<int>(val.list().i(dimX)));333}334else335{336layerParams.set("kernel_h", 1);337layerParams.set("kernel_w", 1);338}339}340341void setPadding(LayerParams &layerParams, const tensorflow::NodeDef &layer)342{343if (hasLayerAttr(layer, "padding"))344layerParams.set("pad_mode", getLayerAttr(layer, "padding").s());345}346347Pin parsePin(const std::string &name)348{349Pin pin(name);350351size_t delimiter_pos = name.find_first_of(":");352if (delimiter_pos != std::string::npos)353{354pin.name = name.substr(0, delimiter_pos);355std::istringstream(name.substr(delimiter_pos + 1)) >> pin.blobIndex;356}357358return pin;359}360361StrIntVector getNextLayers(const tensorflow::GraphDef& net, const String& layer_name, const String& type = "")362{363StrIntVector layers;364365for (int li = 0; li < net.node_size(); li++)366{367const tensorflow::NodeDef& layer = net.node(li);368for (int input_id = 0; input_id < layer.input_size(); input_id++) {369String input_op_name = parsePin(layer.input(input_id)).name;370bool type_ok = type.empty() ? true : type == layer.op();371if (input_op_name == layer_name && type_ok)372layers.push_back(std::make_pair(layer.name(), li));373}374}375376return layers;377}378379void ExcludeLayer(tensorflow::GraphDef& net, const int layer_index, const int input_blob_index, bool remove_from_net = true) {380String layer_name = net.node(layer_index).name();381StrIntVector layers = getNextLayers(net, layer_name);382383String removed_layer_input = net.node(layer_index).input(input_blob_index);384385for (size_t i = 0; i < layers.size(); i++)386{387tensorflow::NodeDef* layer = net.mutable_node(layers[i].second);388for (int input_id = 0; input_id < layer->input_size(); input_id++) {389String input_op_name = layer->input(input_id);390391if (input_op_name == layer_name) {392layer->set_input(input_id, removed_layer_input);393}394}395}396397if (remove_from_net)398net.mutable_node()->DeleteSubrange(layer_index, 1);399}400401class TFImporter {402public:403TFImporter(const char *model, const char *config = NULL);404TFImporter(const char *dataModel, size_t lenModel,405const char *dataConfig = NULL, size_t lenConfig = 0);406407void populateNet(Net dstNet);408409private:410void kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob);411412void connect(const std::map<String, int>& layers_name_id_map, Net& network, const Pin& outPin,413const int input_layer_id, const int input_blob_id);414void connectToAllBlobs(const std::map<String, int>& layer_id, Net& network, const Pin& outPin,415const int input_layer_id, const int input_blobs_count);416const tensorflow::TensorProto& getConstBlob(const tensorflow::NodeDef &layer, std::map<String, int> const_layers,417int input_blob_index = -1, int* actual_inp_blob_idx = 0);418419420// Binary serialized TensorFlow graph includes weights.421tensorflow::GraphDef netBin;422// Optional text definition of TensorFlow graph. More flexible than binary format423// and may be used to build the network using binary format only as a weights storage.424// This approach is similar to Caffe's `.prorotxt` and `.caffemodel`.425tensorflow::GraphDef netTxt;426427std::vector<String> netInputsNames;428};429430TFImporter::TFImporter(const char *model, const char *config)431{432if (model && model[0])433ReadTFNetParamsFromBinaryFileOrDie(model, &netBin);434if (config && config[0])435ReadTFNetParamsFromTextFileOrDie(config, &netTxt);436}437438TFImporter::TFImporter(const char *dataModel, size_t lenModel,439const char *dataConfig, size_t lenConfig)440{441if (dataModel != NULL && lenModel > 0)442ReadTFNetParamsFromBinaryBufferOrDie(dataModel, lenModel, &netBin);443if (dataConfig != NULL && lenConfig > 0)444ReadTFNetParamsFromTextBufferOrDie(dataConfig, lenConfig, &netTxt);445}446447void TFImporter::kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob)448{449MatShape shape;450blobShapeFromTensor(tensor, shape);451int dims = (int)shape.size();452453// TODO: other blob types454CV_Assert(tensor.dtype() == tensorflow::DT_FLOAT ||455tensor.dtype() == tensorflow::DT_HALF);456CV_Assert(dims == 4);457458// REORDER kernel HWIO to OIHW459swap(shape[0], shape[2]); // IWHO460swap(shape[1], shape[3]); // IOHW461swap(shape[0], shape[1]); // OIHW462463dstBlob.create(shape, CV_32F);464465Mat tensorContent = getTensorContent(tensor);466int size = tensorContent.total();467CV_Assert(size == (int)dstBlob.total());468469float *dstData = dstBlob.ptr<float>();470const float *data = reinterpret_cast<const float*>(tensorContent.data);471472int out_c = shape[0], input_c = shape[1], height = shape[2], width = shape[3];473int total = out_c*input_c*height*width;474for(int i_oc = 0; i_oc < out_c; i_oc++) {475for(int i_ic = 0; i_ic < input_c; i_ic++) {476for(int i_h = 0; i_h < height; i_h++) {477for(int i_w = 0; i_w < width; i_w++) {478int dst_i = input_c*height*width*i_oc + height*width*i_ic + width*i_h + i_w;479int src_i = out_c*input_c*width*i_h + out_c*input_c*i_w + out_c*i_ic + i_oc;480CV_Assert(dst_i < total);481CV_Assert(src_i < total);482dstData[dst_i] = data[src_i];483}484}485}486}487}488489void TFImporter::connect(const std::map<String, int>& layers_name_id_map, Net& network, const Pin& outPin,490const int input_layer_id, const int input_blob_id)491{492std::map<String, int>::const_iterator it = layers_name_id_map.find(outPin.name);493if (it == layers_name_id_map.end())494CV_Error(Error::StsError, "Input layer not found: " + outPin.name);495496std::vector<String>::iterator inpNameIt = std::find(netInputsNames.begin(), netInputsNames.end(), outPin.name);497int blobIndex;498if (inpNameIt == netInputsNames.end())499blobIndex = outPin.blobIndex;500else501blobIndex = inpNameIt - netInputsNames.begin();502network.connect(it->second, blobIndex, input_layer_id, input_blob_id);503}504505void TFImporter::connectToAllBlobs(const std::map<String, int>& layer_id, Net& network, const Pin& outPin,506const int input_layer_id, const int input_blobs_count)507{508for (int input_blob_id = 0; input_blob_id < input_blobs_count; input_blob_id++)509connect(layer_id, network, outPin, input_layer_id, input_blob_id);510}511512const tensorflow::TensorProto& TFImporter::getConstBlob(const tensorflow::NodeDef &layer, std::map<String, int> const_layers,513int input_blob_index, int* actual_inp_blob_idx) {514if (input_blob_index == -1) {515for(int i = 0; i < layer.input_size(); i++) {516Pin input = parsePin(layer.input(i));517if (const_layers.find(input.name) != const_layers.end()) {518if (input_blob_index != -1)519CV_Error(Error::StsError, "More than one input is Const op");520521input_blob_index = i;522}523}524}525526if (input_blob_index == -1)527CV_Error(Error::StsError, "Const input blob for weights not found");528529Pin kernel_inp = parsePin(layer.input(input_blob_index));530if (const_layers.find(kernel_inp.name) == const_layers.end())531CV_Error(Error::StsError, "Input [" + layer.input(input_blob_index) +532"] for node [" + layer.name() + "] not found");533if (kernel_inp.blobIndex != 0)534CV_Error(Error::StsError, "Unsupported kernel input");535536if(actual_inp_blob_idx) {537*actual_inp_blob_idx = input_blob_index;538}539540int nodeIdx = const_layers.at(kernel_inp.name);541if (nodeIdx < netBin.node_size() && netBin.node(nodeIdx).name() == kernel_inp.name)542{543return netBin.node(nodeIdx).attr().at("value").tensor();544}545else546{547CV_Assert_N(nodeIdx < netTxt.node_size(),548netTxt.node(nodeIdx).name() == kernel_inp.name);549return netTxt.node(nodeIdx).attr().at("value").tensor();550}551}552553static void addConstNodes(tensorflow::GraphDef& net, std::map<String, int>& const_layers,554std::set<String>& layers_to_ignore)555{556for (int li = 0; li < net.node_size(); li++)557{558const tensorflow::NodeDef &layer = net.node(li);559String name = layer.name();560String type = layer.op();561562if (type == "Dequantize")563{564// Example of Dequantize node:565// name: "conv2d_1/bias"566// op: "Dequantize"567// input: "conv2d_1/bias_quantized_const" (tensor of dtype DT_QUINT8)568// input: "conv2d_1/bias_quantized_min"569// input: "conv2d_1/bias_quantized_max"570// attr { key: "T" value { type: DT_QUINT8 } } (quantized type)571// attr { key: "mode" value { s: "MIN_FIRST" } } (quantization technique)572CV_Assert(layer.input_size() == 3);573for (int i = 0; i < 3; ++i)574CV_Assert(const_layers.find(layer.input(i)) != const_layers.end());575CV_Assert(hasLayerAttr(layer, "mode") &&576getLayerAttr(layer, "mode").s() == "MIN_FIRST");577578int tensorId = const_layers[layer.input(0)];579int minId = const_layers[layer.input(1)];580int maxId = const_layers[layer.input(2)];581582tensorflow::TensorProto* tensor = net.mutable_node(tensorId)583->mutable_attr()->at("value")584.mutable_tensor();585CV_Assert(tensor->dtype() == tensorflow::DT_QUINT8);586587Mat qMin = getTensorContent(net.node(minId).attr().at("value").tensor());588Mat qMax = getTensorContent(net.node(maxId).attr().at("value").tensor());589CV_Assert_N(qMin.total() == 1, qMin.type() == CV_32FC1,590qMax.total() == 1, qMax.type() == CV_32FC1);591592Mat content = getTensorContent(*tensor);593594float minVal = qMin.at<float>(0);595float rangeScale = (qMax.at<float>(0) - minVal) / 255;596CV_Assert(rangeScale >= 0);597content.convertTo(content, CV_32FC1, rangeScale,598rangeScale * cvRound(minVal / rangeScale));599600tensor->set_dtype(tensorflow::DT_FLOAT);601tensor->set_tensor_content(content.data, content.total() * content.elemSize1());602603net.mutable_node(tensorId)->set_name(name);604CV_Assert(const_layers.insert(std::make_pair(name, tensorId)).second);605layers_to_ignore.insert(name);606continue;607}608else if (type != "Const")609continue; // only Const parameters are supported610611if (layer.attr().find("value") != layer.attr().end())612{613CV_Assert(const_layers.insert(std::make_pair(name, li)).second);614}615layers_to_ignore.insert(name);616}617}618619// If all inputs of specific layer have the same data layout we can say that620// this layer's output has this data layout too. Returns DATA_LAYOUT_UNKNOWN otherwise.621static int predictOutputDataLayout(const tensorflow::GraphDef& net,622const tensorflow::NodeDef& layer,623const std::map<String, int>& data_layouts)624{625int layout = getDataLayout(layer);626if (layout != DATA_LAYOUT_UNKNOWN)627return layout;628629// Determine layout by layer's inputs630std::map<String, int>::const_iterator it;631for (int i = 0, n = layer.input_size(); i < n; ++i)632{633it = data_layouts.find(getNodeName(layer.input(i)));634if (it != data_layouts.end())635{636if (layout != DATA_LAYOUT_UNKNOWN)637{638if (it->second != layout && it->second != DATA_LAYOUT_UNKNOWN)639return DATA_LAYOUT_UNKNOWN;640}641else642layout = it->second;643}644}645646if (layout != DATA_LAYOUT_UNKNOWN)647return layout;648649// Determine layout by layer's consumers recursively.650it = data_layouts.find(layer.name());651CV_Assert(it != data_layouts.end());652return it->second;653}654655void TFImporter::populateNet(Net dstNet)656{657RemoveIdentityOps(netBin);658RemoveIdentityOps(netTxt);659660if (!netTxt.ByteSize())661simplifySubgraphs(netBin);662663std::set<String> layers_to_ignore;664665tensorflow::GraphDef& net = netTxt.ByteSize() != 0 ? netTxt : netBin;666667int layersSize = net.node_size();668669std::map<String, int> data_layouts;670// Pre-fill data layouts where they are set explicitly.671// Assuming that nodes are in topological order672for (int i = net.node_size() - 1; i >= 0; --i)673{674const tensorflow::NodeDef& layer = net.node(i);675std::string name = layer.name();676677int layout = getDataLayout(layer);678std::map<String, int>::iterator it = data_layouts.find(name);679if (it != data_layouts.end())680{681if (layout != DATA_LAYOUT_UNKNOWN)682{683if (it->second == DATA_LAYOUT_UNKNOWN)684it->second = layout;685else if (it->second != layout)686{687it->second = DATA_LAYOUT_UNKNOWN;688layout = DATA_LAYOUT_UNKNOWN;689}690}691else692layout = it->second;693}694else695data_layouts[name] = layout;696697// Specify input layers to have the same data layout.698for (int j = 0; j < layer.input_size(); ++j)699{700name = getNodeName(layer.input(j));701it = data_layouts.find(name);702if (it != data_layouts.end())703{704if (layout != DATA_LAYOUT_UNKNOWN)705{706if (it->second == DATA_LAYOUT_UNKNOWN)707it->second = layout;708else if (it->second != layout)709it->second = DATA_LAYOUT_UNKNOWN;710}711}712else713data_layouts[name] = layout;714}715}716717// find all Const layers for params718std::map<String, int> value_id;719// A map with constant blobs which are shared between multiple layers.720std::map<String, Mat> sharedWeights;721addConstNodes(netBin, value_id, layers_to_ignore);722addConstNodes(netTxt, value_id, layers_to_ignore);723724std::map<String, int> layer_id;725726for (int li = 0; li < layersSize; li++)727{728tensorflow::NodeDef layer = net.node(li);729String name = layer.name();730String type = layer.op();731LayerParams layerParams;732733if(layers_to_ignore.find(name) != layers_to_ignore.end())734continue;735736int predictedLayout = predictOutputDataLayout(net, layer, data_layouts);737data_layouts[name] = predictedLayout;738739if (type == "Conv2D" || type == "SpaceToBatchND" || type == "DepthwiseConv2dNative" || type == "Pad")740{741// The first node of dilated convolution subgraph.742// Extract input node, dilation rate and paddings.743std::string input = layer.input(0);744StrIntVector next_layers;745if (type == "SpaceToBatchND" || type == "Pad")746{747next_layers = getNextLayers(net, name, "Conv2D");748if (next_layers.empty())749next_layers = getNextLayers(net, name, "DepthwiseConv2dNative");750}751if (type == "SpaceToBatchND")752{753// op: "SpaceToBatchND"754// input: "input"755// input: "SpaceToBatchND/block_shape"756// input: "SpaceToBatchND/paddings"757CV_Assert(layer.input_size() == 3);758759DictValue dilation = parseDims(getConstBlob(layer, value_id, 1));760CV_Assert(dilation.size() == 2);761layerParams.set("dilation_h", dilation.get<int>(0));762layerParams.set("dilation_w", dilation.get<int>(1));763764Mat paddings;765parseTensor<int>(getConstBlob(layer, value_id, 2), paddings);766767// paddings is a 2x2 matrix: [[top, bot], [left, right]]768layerParams.set("pad_h", paddings.at<float>(0));769layerParams.set("pad_w", paddings.at<float>(2));770771CV_Assert(next_layers.size() == 1);772layer = net.node(next_layers[0].second);773layers_to_ignore.insert(next_layers[0].first);774name = layer.name();775type = layer.op();776}777else if (type == "Pad")778{779Mat paddings = getTensorContent(getConstBlob(layer, value_id, 1));780CV_Assert(paddings.type() == CV_32SC1);781if (paddings.total() == 8)782{783// Perhabs, we have NHWC padding dimensions order.784// N H W C785// 0 1 2 3 4 5 6 7786std::swap(paddings.at<int32_t>(2), paddings.at<int32_t>(6));787std::swap(paddings.at<int32_t>(3), paddings.at<int32_t>(7));788// N C W H789// 0 1 2 3 4 5 6 7790std::swap(paddings.at<int32_t>(4), paddings.at<int32_t>(6));791std::swap(paddings.at<int32_t>(5), paddings.at<int32_t>(7));792// N C H W793// 0 1 2 3 4 5 6 7794}795if (next_layers.empty() || paddings.total() != 8 ||796paddings.at<int32_t>(4) != paddings.at<int32_t>(5) ||797paddings.at<int32_t>(6) != paddings.at<int32_t>(7))798{799// Just a single padding layer.800layerParams.set("paddings", DictValue::arrayInt<int*>((int*)paddings.data, paddings.total()));801802int id = dstNet.addLayer(name, "Padding", layerParams);803layer_id[name] = id;804805connect(layer_id, dstNet, parsePin(input), id, 0);806continue;807}808else809{810// Merge with subsequent convolutional layer.811CV_Assert(next_layers.size() == 1);812813layerParams.set("pad_h", paddings.at<int32_t>(4));814layerParams.set("pad_w", paddings.at<int32_t>(6));815816layer = net.node(next_layers[0].second);817layers_to_ignore.insert(next_layers[0].first);818name = layer.name();819type = layer.op();820}821}822823// For the object detection networks, TensorFlow Object Detection API824// predicts deltas for bounding boxes in yxYX (ymin, xmin, ymax, xmax)825// order. We can manage it at DetectionOutput layer parsing predictions826// or shuffle last convolution's weights.827bool locPredTransposed = hasLayerAttr(layer, "loc_pred_transposed") &&828getLayerAttr(layer, "loc_pred_transposed").b();829830layerParams.set("bias_term", false);831layerParams.blobs.resize(1);832833next_layers = getNextLayers(net, name, "BiasAdd");834if (next_layers.size() == 1) {835layerParams.set("bias_term", true);836layerParams.blobs.resize(2);837838int weights_layer_index = next_layers[0].second;839840blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]);841ExcludeLayer(net, weights_layer_index, 0, false);842layers_to_ignore.insert(next_layers[0].first);843844// Shuffle bias from yxYX to xyXY.845if (locPredTransposed)846{847const int numWeights = layerParams.blobs[1].total();848float* biasData = reinterpret_cast<float*>(layerParams.blobs[1].data);849CV_Assert(numWeights % 4 == 0);850for (int i = 0; i < numWeights; i += 2)851{852std::swap(biasData[i], biasData[i + 1]);853}854}855}856857int kernelTensorInpId = -1;858const tensorflow::TensorProto& kernelTensor = getConstBlob(layer, value_id, -1, &kernelTensorInpId);859const String kernelTensorName = layer.input(kernelTensorInpId);860std::map<String, Mat>::iterator sharedWeightsIt = sharedWeights.find(kernelTensorName);861if (sharedWeightsIt == sharedWeights.end())862{863kernelFromTensor(kernelTensor, layerParams.blobs[0]);864releaseTensor(const_cast<tensorflow::TensorProto*>(&kernelTensor));865866int* kshape = layerParams.blobs[0].size.p;867const int outCh = kshape[0];868const int inCh = kshape[1];869const int height = kshape[2];870const int width = kshape[3];871if (type == "DepthwiseConv2dNative")872{873CV_Assert(!locPredTransposed);874const int chMultiplier = kshape[0];875876Mat copy = layerParams.blobs[0].clone();877float* src = (float*)copy.data;878float* dst = (float*)layerParams.blobs[0].data;879for (int i = 0; i < chMultiplier; ++i)880for (int j = 0; j < inCh; ++j)881for (int s = 0; s < height * width; ++s)882{883int src_i = (i * inCh + j) * height * width + s;884int dst_i = (j * chMultiplier + i) * height* width + s;885dst[dst_i] = src[src_i];886}887// TODO Use reshape instead888kshape[0] = inCh * chMultiplier;889kshape[1] = 1;890size_t* kstep = layerParams.blobs[0].step.p;891kstep[0] = kstep[1]; // fix steps too892}893894// Shuffle output channels from yxYX to xyXY.895if (locPredTransposed)896{897const int slice = height * width * inCh;898for (int i = 0; i < outCh; i += 2)899{900cv::Mat src(1, slice, CV_32F, layerParams.blobs[0].ptr<float>(i));901cv::Mat dst(1, slice, CV_32F, layerParams.blobs[0].ptr<float>(i + 1));902std::swap_ranges(src.begin<float>(), src.end<float>(), dst.begin<float>());903}904}905sharedWeights[kernelTensorName] = layerParams.blobs[0];906}907else908{909layerParams.blobs[0] = sharedWeightsIt->second;910}911912layerParams.set("kernel_h", layerParams.blobs[0].size[2]);913layerParams.set("kernel_w", layerParams.blobs[0].size[3]);914layerParams.set("num_output", layerParams.blobs[0].size[0]);915916setStrides(layerParams, layer);917if (!layerParams.has("pad_w") && !layerParams.has("pad_h"))918setPadding(layerParams, layer);919920// The final node of dilated convolution subgraph.921next_layers = getNextLayers(net, name, "BatchToSpaceND");922if (!next_layers.empty())923{924CV_Assert(next_layers.size() == 1);925ExcludeLayer(net, next_layers[0].second, 0, false);926layers_to_ignore.insert(next_layers[0].first);927}928929int id = dstNet.addLayer(name, "Convolution", layerParams);930layer_id[name] = id;931932// one input only933connect(layer_id, dstNet, parsePin(input), id, 0);934935936if (getDataLayout(name, data_layouts) == DATA_LAYOUT_UNKNOWN)937data_layouts[name] = DATA_LAYOUT_NHWC;938}939else if (type == "BiasAdd" || type == "Add")940{941bool haveConst = false;942for(int ii = 0; !haveConst && ii < layer.input_size(); ++ii)943{944Pin input = parsePin(layer.input(ii));945haveConst = value_id.find(input.name) != value_id.end();946}947CV_Assert(!haveConst || layer.input_size() == 2);948949if (haveConst)950{951Mat values = getTensorContent(getConstBlob(layer, value_id));952CV_Assert(values.type() == CV_32FC1);953954int id;955if (values.total() == 1) // is a scalar.956{957layerParams.set("shift", values.at<float>(0));958id = dstNet.addLayer(name, "Power", layerParams);959}960else // is a vector961{962layerParams.blobs.resize(1, values);963id = dstNet.addLayer(name, "Shift", layerParams);964}965layer_id[name] = id;966967// one input only968connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);969}970else971{972layerParams.set("operation", "sum");973int id = dstNet.addLayer(name, "Eltwise", layerParams);974layer_id[name] = id;975976for (int ii = 0; ii < layer.input_size(); ii++)977{978Pin inp = parsePin(layer.input(ii));979if (layer_id.find(inp.name) == layer_id.end())980CV_Error(Error::StsError, "Input layer not found: " + inp.name);981connect(layer_id, dstNet, inp, id, ii);982}983}984}985else if (type == "Sub")986{987bool haveConst = false;988for(int ii = 0; !haveConst && ii < layer.input_size(); ++ii)989{990Pin input = parsePin(layer.input(ii));991haveConst = value_id.find(input.name) != value_id.end();992}993CV_Assert(haveConst);994995Mat values = getTensorContent(getConstBlob(layer, value_id));996CV_Assert(values.type() == CV_32FC1);997values *= -1.0f;998999int id;1000if (values.total() == 1) // is a scalar.1001{1002layerParams.set("shift", values.at<float>(0));1003id = dstNet.addLayer(name, "Power", layerParams);1004}1005else // is a vector1006{1007layerParams.blobs.resize(1, values);1008id = dstNet.addLayer(name, "Shift", layerParams);1009}1010layer_id[name] = id;10111012// one input only1013connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);1014}1015else if (type == "MatMul")1016{1017CV_Assert(layer.input_size() == 2);10181019// For the object detection networks, TensorFlow Object Detection API1020// predicts deltas for bounding boxes in yxYX (ymin, xmin, ymax, xmax)1021// order. We can manage it at DetectionOutput layer parsing predictions1022// or shuffle last Faster-RCNN's matmul weights.1023bool locPredTransposed = hasLayerAttr(layer, "loc_pred_transposed") &&1024getLayerAttr(layer, "loc_pred_transposed").b();10251026layerParams.set("bias_term", false);1027layerParams.blobs.resize(1);10281029StrIntVector next_layers = getNextLayers(net, name, "BiasAdd");1030if (next_layers.empty())1031{1032next_layers = getNextLayers(net, name, "Add");1033}1034if (next_layers.size() == 1) {1035layerParams.set("bias_term", true);1036layerParams.blobs.resize(2);10371038int weights_layer_index = next_layers[0].second;1039blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]);1040ExcludeLayer(net, weights_layer_index, 0, false);1041layers_to_ignore.insert(next_layers[0].first);10421043if (locPredTransposed)1044{1045const int numWeights = layerParams.blobs[1].total();1046float* biasData = reinterpret_cast<float*>(layerParams.blobs[1].data);1047CV_Assert(numWeights % 4 == 0);1048for (int i = 0; i < numWeights; i += 2)1049{1050std::swap(biasData[i], biasData[i + 1]);1051}1052}1053}10541055int kernel_blob_index = -1;1056const tensorflow::TensorProto& kernelTensor = getConstBlob(layer, value_id, -1, &kernel_blob_index);1057blobFromTensor(kernelTensor, layerParams.blobs[0]);1058releaseTensor(const_cast<tensorflow::TensorProto*>(&kernelTensor));10591060if (kernel_blob_index == 1) { // In this case output is computed by x*W formula - W should be transposed1061Mat data = layerParams.blobs[0].t();1062layerParams.blobs[0] = data.clone();1063}10641065layerParams.set("num_output", layerParams.blobs[0].size[0]);1066if (locPredTransposed)1067{1068CV_Assert(layerParams.blobs[0].dims == 2);1069for (int i = 0; i < layerParams.blobs[0].size[0]; i += 2)1070{1071cv::Mat src = layerParams.blobs[0].row(i);1072cv::Mat dst = layerParams.blobs[0].row(i + 1);1073std::swap_ranges(src.begin<float>(), src.end<float>(), dst.begin<float>());1074}1075}10761077int id = dstNet.addLayer(name, "InnerProduct", layerParams);1078layer_id[name] = id;10791080// one input only1081int input_blob_index = kernel_blob_index == 0 ? 1 : 0;1082connect(layer_id, dstNet, parsePin(layer.input(input_blob_index)), id, 0);1083data_layouts[name] = DATA_LAYOUT_PLANAR;1084}1085else if (type == "Reshape")1086{1087Pin inpId = parsePin(layer.input(0));1088int inpLayout = getDataLayout(layer.input(0), data_layouts);1089// There are two possible implementations: reshape an input using1090// predefined sizes or use a second input blob as a source of new shape.1091if (value_id.find(layer.input(1)) != value_id.end())1092{1093Mat newShape = getTensorContent(getConstBlob(layer, value_id, 1));10941095if (newShape.total() != 4 && inpLayout == DATA_LAYOUT_NHWC)1096{1097LayerParams permLP;1098int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC.1099permLP.set("order", DictValue::arrayInt<int*>(order, 4));11001101std::string permName = name + "/nchw";1102CV_Assert(layer_id.find(permName) == layer_id.end());1103int permId = dstNet.addLayer(permName, "Permute", permLP);1104layer_id[permName] = permId;1105connect(layer_id, dstNet, inpId, permId, 0);1106inpId = Pin(permName);1107inpLayout = DATA_LAYOUT_NCHW;1108}1109else if (newShape.total() == 4 && inpLayout == DATA_LAYOUT_NHWC)1110{1111// NHWC->NCHW1112std::swap(*newShape.ptr<int32_t>(0, 2), *newShape.ptr<int32_t>(0, 3));1113std::swap(*newShape.ptr<int32_t>(0, 1), *newShape.ptr<int32_t>(0, 2));1114}1115layerParams.set("dim", DictValue::arrayInt<int*>(newShape.ptr<int>(), newShape.total()));11161117int id = dstNet.addLayer(name, "Reshape", layerParams);1118layer_id[name] = id;11191120// one input only1121connect(layer_id, dstNet, inpId, id, 0);1122data_layouts[name] = newShape.total() == 2 ? DATA_LAYOUT_PLANAR : inpLayout;1123}1124else1125{1126int id = dstNet.addLayer(name, "Reshape", layerParams);1127layer_id[name] = id;1128connect(layer_id, dstNet, inpId, id, 0);1129connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1);1130data_layouts[name] = inpLayout;1131}1132}1133else if (type == "Flatten" || type == "Squeeze")1134{1135Pin inpId = parsePin(layer.input(0));1136int inpLayout = getDataLayout(layer.input(0), data_layouts);1137if (type == "Squeeze")1138{1139CV_Assert(hasLayerAttr(layer, "squeeze_dims"));1140const tensorflow::AttrValue& dims = getLayerAttr(layer, "squeeze_dims");1141if (inpLayout == DATA_LAYOUT_NHWC)1142{1143if (dims.list().i_size() != 2 || dims.list().i(0) != 1 || dims.list().i(1) != 2)1144CV_Error(Error::StsNotImplemented, "Unsupported squeeze configuration");1145}1146else if (inpLayout == DATA_LAYOUT_NCHW)1147{1148if (dims.list().i_size() != 2 || dims.list().i(0) != 2 || dims.list().i(1) != 3)1149CV_Error(Error::StsNotImplemented, "Unsupported squeeze configuration");1150}1151else1152CV_Error(Error::StsNotImplemented, "Unsupported squeeze configuration");1153}1154if (inpLayout == DATA_LAYOUT_NHWC)1155{1156LayerParams permLP;1157int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC.1158permLP.set("order", DictValue::arrayInt<int*>(order, 4));11591160std::string permName = name + "/nchw";1161CV_Assert(layer_id.find(permName) == layer_id.end());1162int permId = dstNet.addLayer(permName, "Permute", permLP);1163layer_id[permName] = permId;1164connect(layer_id, dstNet, inpId, permId, 0);1165inpId = Pin(permName);1166}1167int id = dstNet.addLayer(name, "Flatten", layerParams);1168layer_id[name] = id;1169connect(layer_id, dstNet, inpId, id, 0);1170data_layouts[name] = DATA_LAYOUT_PLANAR;1171}1172else if (type == "Transpose")1173{1174Mat perm = getTensorContent(getConstBlob(layer, value_id, 1));1175CV_Assert(perm.type() == CV_32SC1);1176int* permData = (int*)perm.data;1177if (perm.total() == 4)1178{1179// Only NHWC <-> NCHW permutations are allowed. OpenCV is always1180// keep NCHW layout this way.1181int inpLayout = getDataLayout(layer.input(0), data_layouts);1182if (inpLayout == DATA_LAYOUT_NHWC)1183{1184if (permData[0] == 0 && permData[1] == 3 && permData[2] == 1 && permData[3] == 2)1185{1186// in TensorFlow: NHWC->NCHW1187// in OpenCV: NCHW->NCHW1188data_layouts[name] = DATA_LAYOUT_NCHW;1189}1190else if (permData[0] == 0 && permData[1] == 1 && permData[2] == 2 && permData[3] == 3)1191{1192// in TensorFlow: NHWC->NHWC1193// in OpenCV: NCHW->NCHW1194data_layouts[name] = DATA_LAYOUT_NHWC;1195}1196else1197CV_Error(Error::StsParseError, "Only NHWC <-> NCHW permutations are allowed.");1198}1199else if (inpLayout == DATA_LAYOUT_NCHW)1200{1201if (permData[0] == 0 && permData[1] == 2 && permData[2] == 3 && permData[3] == 1)1202{1203// in TensorFlow: NCHW->NHWC1204// in OpenCV: NCHW->NCHW1205data_layouts[name] = DATA_LAYOUT_NHWC;1206}1207else if (permData[0] == 0 && permData[1] == 1 && permData[2] == 2 && permData[3] == 3)1208{1209// in TensorFlow: NCHW->NCHW1210// in OpenCV: NCHW->NCHW1211data_layouts[name] = DATA_LAYOUT_NCHW;1212}1213else1214CV_Error(Error::StsParseError, "Only NHWC <-> NCHW permutations are allowed.");1215}1216int id = dstNet.addLayer(name, "Identity", layerParams);1217layer_id[name] = id;1218connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);1219}1220else1221{1222layerParams.set("order", DictValue::arrayInt<int*>(permData, perm.total()));12231224int id = dstNet.addLayer(name, "Permute", layerParams);1225layer_id[name] = id;12261227// one input only1228connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);1229data_layouts[name] = DATA_LAYOUT_UNKNOWN;1230}1231}1232else if (type == "Const")1233{1234}1235else if (type == "LRN")1236{1237if(hasLayerAttr(layer, "alpha")) {1238layerParams.set("alpha", getLayerAttr(layer, "alpha").f());1239}1240if(hasLayerAttr(layer, "beta")) {1241layerParams.set("beta", getLayerAttr(layer, "beta").f());1242}1243if(hasLayerAttr(layer, "depth_radius")) {1244int radius = (int)getLayerAttr(layer, "depth_radius").i();1245layerParams.set("local_size", 2*radius + 1);1246}1247if(hasLayerAttr(layer, "bias")) {1248layerParams.set("bias", getLayerAttr(layer, "bias").f());1249}1250layerParams.set("norm_by_size", false);12511252int id = dstNet.addLayer(name, "LRN", layerParams);1253layer_id[name] = id;12541255connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size());1256}1257else if (type == "Concat" || type == "ConcatV2")1258{1259int axisId = (type == "Concat" ? 0 : layer.input_size() - 1);1260int axis = getConstBlob(layer, value_id, axisId).int_val().Get(0);12611262if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC)1263axis = toNCHW(axis);1264layerParams.set("axis", axis);12651266int id = dstNet.addLayer(name, "Concat", layerParams);1267layer_id[name] = id;126812691270int from = (type == "Concat" ? 1 : 0);1271int to = (type == "Concat" ? layer.input_size() : layer.input_size() - 1);12721273// input(0) or input(n-1) is concat_dim1274for (int ii = from; ii < to; ii++)1275{1276Pin inp = parsePin(layer.input(ii));1277if (layer_id.find(inp.name) == layer_id.end())1278CV_Error(Error::StsError, "Input layer not found: " + inp.name);1279connect(layer_id, dstNet, inp, id, ii - from);1280}1281}1282else if (type == "MaxPool")1283{1284layerParams.set("pool", "max");12851286setKSize(layerParams, layer);1287setStrides(layerParams, layer);1288setPadding(layerParams, layer);12891290int id = dstNet.addLayer(name, "Pooling", layerParams);1291layer_id[name] = id;12921293connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size());1294}1295else if (type == "AvgPool")1296{1297layerParams.set("pool", "ave");1298layerParams.set("ave_pool_padded_area", false);12991300setKSize(layerParams, layer);1301setStrides(layerParams, layer);1302setPadding(layerParams, layer);13031304int id = dstNet.addLayer(name, "Pooling", layerParams);1305layer_id[name] = id;13061307connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size());1308}1309else if (type == "Placeholder")1310{1311if (!hasLayerAttr(layer, "dtype") ||1312getLayerAttr(layer, "dtype").type() != tensorflow::DT_BOOL) // If input is not a train/test flag.1313{1314netInputsNames.push_back(name);1315layer_id[name] = 0;1316}1317}1318else if (type == "Split") {1319// TODO: determining axis index remapping by input dimensions order of input blob1320// TODO: slicing input may be Const op1321// TODO: slicing kernels for convolutions - in current implementation it is impossible1322// TODO: add parsing num of slices parameter1323CV_Assert(layer.input_size() == 2);1324// num_split1325// 1st blob is dims tensor1326int axis = getConstBlob(layer, value_id, 0).int_val().Get(0);1327layerParams.set("axis", toNCHW(axis));13281329int id = dstNet.addLayer(name, "Slice", layerParams);1330layer_id[name] = id;13311332// one input only1333connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0);1334}1335else if (type == "Slice")1336{1337// op: "Slice"1338// input: "input_node"1339// input: "Slice/begin"1340// input: "Slice/size"1341CV_Assert(layer.input_size() == 3);1342Mat begins = getTensorContent(getConstBlob(layer, value_id, 1));1343Mat sizes = getTensorContent(getConstBlob(layer, value_id, 2));1344CV_Assert_N(!begins.empty(), !sizes.empty());1345CV_CheckTypeEQ(begins.type(), CV_32SC1, "");1346CV_CheckTypeEQ(sizes.type(), CV_32SC1, "");13471348if (begins.total() == 4 && getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC)1349{1350// Swap NHWC parameters' order to NCHW.1351std::swap(*begins.ptr<int32_t>(0, 2), *begins.ptr<int32_t>(0, 3));1352std::swap(*begins.ptr<int32_t>(0, 1), *begins.ptr<int32_t>(0, 2));1353std::swap(*sizes.ptr<int32_t>(0, 2), *sizes.ptr<int32_t>(0, 3));1354std::swap(*sizes.ptr<int32_t>(0, 1), *sizes.ptr<int32_t>(0, 2));1355}1356layerParams.set("begin", DictValue::arrayInt((int*)begins.data, begins.total()));1357layerParams.set("size", DictValue::arrayInt((int*)sizes.data, sizes.total()));13581359int id = dstNet.addLayer(name, "Slice", layerParams);1360layer_id[name] = id;13611362connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);1363}1364else if (type == "Mul")1365{1366bool haveConst = false;1367for(int ii = 0; !haveConst && ii < layer.input_size(); ++ii)1368{1369Pin input = parsePin(layer.input(ii));1370haveConst = value_id.find(input.name) != value_id.end();1371}1372CV_Assert(!haveConst || layer.input_size() == 2);13731374if (haveConst)1375{1376// Multiplication by constant.1377CV_Assert(layer.input_size() == 2);1378Mat scaleMat = getTensorContent(getConstBlob(layer, value_id));1379CV_Assert(scaleMat.type() == CV_32FC1);13801381int id;1382if (scaleMat.total() == 1) // is a scalar.1383{1384// Try to match with a LeakyRelu:1385// node {1386// name: "LeakyRelu/mul"1387// op: "Mul"1388// input: "LeakyRelu/alpha"1389// input: "input"1390// }1391// node {1392// name: "LeakyRelu/Maximum"1393// op: "Maximum"1394// input: "LeakyRelu/mul"1395// input: "input"1396// }1397StrIntVector next_layers = getNextLayers(net, name, "Maximum");1398if (!next_layers.empty())1399{1400int maximumLayerIdx = next_layers[0].second;14011402CV_Assert(net.node(maximumLayerIdx).input_size() == 2);14031404// The input from the Mul layer can also be at index 1.1405int mulInputIdx = (net.node(maximumLayerIdx).input(0) == name) ? 0 : 1;14061407ExcludeLayer(net, maximumLayerIdx, mulInputIdx, false);1408layers_to_ignore.insert(next_layers[0].first);14091410layerParams.set("negative_slope", scaleMat.at<float>(0));1411id = dstNet.addLayer(name, "ReLU", layerParams);1412}1413else1414{1415// Just a multiplication.1416layerParams.set("scale", scaleMat.at<float>(0));1417id = dstNet.addLayer(name, "Power", layerParams);1418}1419}1420else // is a vector1421{1422layerParams.blobs.resize(1, scaleMat);14231424StrIntVector next_layers = getNextLayers(net, name, "Add");1425if (!next_layers.empty())1426{1427layerParams.set("bias_term", true);1428layerParams.blobs.resize(2);14291430int weights_layer_index = next_layers[0].second;1431blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs.back());1432ExcludeLayer(net, weights_layer_index, 0, false);1433layers_to_ignore.insert(next_layers[0].first);1434}14351436if (hasLayerAttr(layer, "axis"))1437layerParams.set("axis", getLayerAttr(layer, "axis").i());14381439id = dstNet.addLayer(name, "Scale", layerParams);1440}1441layer_id[name] = id;14421443Pin inp0 = parsePin(layer.input(0));1444if (layer_id.find(inp0.name) != layer_id.end())1445// First operand is a constant.1446connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);1447else1448connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0);1449}1450else1451{1452layerParams.set("operation", "prod");1453int id = dstNet.addLayer(name, "Eltwise", layerParams);1454layer_id[name] = id;14551456for (int ii = 0; ii < layer.input_size(); ii++)1457{1458Pin inp = parsePin(layer.input(ii));1459if (layer_id.find(inp.name) == layer_id.end())1460CV_Error(Error::StsError, "Input layer not found: " + inp.name);1461connect(layer_id, dstNet, inp, id, ii);1462}1463}1464}1465else if (type == "FusedBatchNorm")1466{1467// op: "FusedBatchNorm"1468// input: "input"1469// input: "BatchNorm/gamma"1470// input: "BatchNorm/beta"1471// input: "BatchNorm/moving_mean"1472// input: "BatchNorm/moving_variance"1473if (layer.input_size() != 5)1474CV_Error(Error::StsNotImplemented,1475"Expected gamma, beta, mean and std");1476Pin inpId = parsePin(layer.input(0));14771478bool isTraining = hasLayerAttr(layer, "is_training") && getLayerAttr(layer, "is_training").b();14791480layerParams.blobs.resize(2);14811482const tensorflow::TensorProto& gammaTensor = getConstBlob(layer, value_id, 1);1483if (!gammaTensor.tensor_content().empty())1484{1485layerParams.blobs.resize(layerParams.blobs.size() + 1);1486layerParams.set("has_weight", true);1487blobFromTensor(gammaTensor, layerParams.blobs.back());1488}1489else1490layerParams.set("has_weight", false);14911492const tensorflow::TensorProto& betaTensor = getConstBlob(layer, value_id, 2);1493if (!betaTensor.tensor_content().empty())1494{1495layerParams.blobs.resize(layerParams.blobs.size() + 1);1496layerParams.set("has_bias", true);1497blobFromTensor(betaTensor, layerParams.blobs.back());1498}1499else1500layerParams.set("has_bias", false);15011502Mat mean, std;1503if (isTraining)1504{1505if (layerParams.blobs.size() == 2)1506CV_Error(Error::StsNotImplemented, "Cannot determine number "1507"of parameters for batch normalization layer.");1508mean = Mat::zeros(1, layerParams.blobs[3].total(), CV_32F);1509std = Mat::ones(1, layerParams.blobs[3].total(), CV_32F);15101511// Add an extra layer: Mean-Variance normalization1512LayerParams mvnParams;1513std::string mvnName = name + "/MVN";1514CV_Assert(layer_id.find(mvnName) == layer_id.end());1515int mvnId = dstNet.addLayer(mvnName, "MVN", mvnParams);1516layer_id[mvnName] = mvnId;1517connect(layer_id, dstNet, inpId, mvnId, 0);1518inpId = Pin(mvnName);1519}1520else1521{1522blobFromTensor(getConstBlob(layer, value_id, 3), mean);1523blobFromTensor(getConstBlob(layer, value_id, 4), std);1524}1525layerParams.blobs[0] = mean;1526layerParams.blobs[1] = std;15271528if (hasLayerAttr(layer, "epsilon"))1529layerParams.set("eps", getLayerAttr(layer, "epsilon").f());15301531int id = dstNet.addLayer(name, "BatchNorm", layerParams);1532layer_id[name] = id;15331534// one input only1535connect(layer_id, dstNet, inpId, id, 0);1536}1537else if (type == "Conv2DBackpropInput")1538{1539// op: "Conv2DBackpropInput"1540// input: "conv2d_transpose/output_shape"1541// input: "weights"1542// input: "input"1543if (layer.input_size() != 3)1544CV_Error(Error::StsNotImplemented,1545"Expected output shape, weights and input nodes");15461547layerParams.set("bias_term", false);1548layerParams.blobs.resize(1);15491550StrIntVector next_layers = getNextLayers(net, name, "BiasAdd");1551if (next_layers.size() == 1)1552{1553layerParams.set("bias_term", true);1554layerParams.blobs.resize(2);15551556int weights_layer_index = next_layers[0].second;15571558blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]);1559ExcludeLayer(net, weights_layer_index, 0, false);1560layers_to_ignore.insert(next_layers[0].first);1561}15621563kernelFromTensor(getConstBlob(layer, value_id, 1), layerParams.blobs[0]);15641565const int* kshape = layerParams.blobs[0].size.p;1566const int kernelH = kshape[2];1567const int kernelW = kshape[3];1568layerParams.set("kernel_h", kernelH);1569layerParams.set("kernel_w", kernelW);1570layerParams.set("num_output", kshape[1]);15711572setStrides(layerParams, layer);1573setPadding(layerParams, layer);15741575// For convolution layer, output shape computes as1576// o = 1 + (i - k + 2*p) / s1577// i - input size, o - output size, k - kernel size, p - pad, s - stride1578// In TensorFlow, p == 0 is padMode == 'VALID' or p == (k - 1) / 21579// considering that k is odd.1580// SAME: o = 1 + (i - 1) / s1581// VALID: o = 1 + i / s1582// Deconvolution's layer output shape computes as1583// SAME: o = 1 + (i - 1)*s1584// VALID: o = (i - 1)*s1585// If output_shape differs from formulas above then adjust padding is applied.15861587const int strideY = layerParams.get<int>("stride_h");1588const int strideX = layerParams.get<int>("stride_w");1589Mat outShape = getTensorContent(getConstBlob(layer, value_id, 0));1590const int outH = outShape.at<int>(1);1591const int outW = outShape.at<int>(2);1592if (layerParams.get<String>("pad_mode") == "SAME")1593{1594layerParams.set("adj_w", (outW - 1) % strideX);1595layerParams.set("adj_h", (outH - 1) % strideY);1596}1597else if (layerParams.get<String>("pad_mode") == "VALID")1598{1599layerParams.set("adj_w", (outW - kernelW) % strideX);1600layerParams.set("adj_h", (outH - kernelH) % strideY);1601}1602int id = dstNet.addLayer(name, "Deconvolution", layerParams);1603layer_id[name] = id;16041605// one input only1606connect(layer_id, dstNet, parsePin(layer.input(2)), id, 0);1607}1608else if (type == "BlockLSTM")1609{1610// op: "BlockLSTM"1611// input: "lstm_block_wrapper/ToInt64/x" (ignore, number of time stamps)1612// input: "input"1613// input: "lstm_block_wrapper/zeros" (ignore)1614// input: "lstm_block_wrapper/zeros" (ignore)1615// input: "lstm_block_wrapper/kernel"1616// input: "lstm_block_wrapper/w_i_diag"1617// input: "lstm_block_wrapper/w_f_diag"1618// input: "lstm_block_wrapper/w_o_diag"1619// input: "lstm_block_wrapper/bias"1620if (layer.input_size() != 9)1621CV_Error(Error::StsNotImplemented, "Unexpected number of input nodes");16221623if (hasLayerAttr(layer, "forget_bias"))1624layerParams.set("forget_bias", getLayerAttr(layer, "forget_bias").f());16251626if (hasLayerAttr(layer, "forget_bias"))1627{1628float cellClip = getLayerAttr(layer, "cell_clip").f();1629// Cell clip disabled if it's negative.1630if (cellClip >= 0)1631{1632layerParams.set("use_cell_clip", true);1633layerParams.set("cell_clip", cellClip);1634}1635}16361637Mat W, Wh, Wx, b;1638blobFromTensor(getConstBlob(layer, value_id, 4), W);1639blobFromTensor(getConstBlob(layer, value_id, 8), b);1640const int outSize = W.cols / 4;16411642// IGFO->IFOG1643float* weightData = (float*)W.data;1644for (int i = 0; i < W.rows; ++i)1645for (int j = 0; j < outSize; ++j)1646{1647std::swap(weightData[i * W.cols + 1 * outSize + j],1648weightData[i * W.cols + 2 * outSize + j]);1649std::swap(weightData[i * W.cols + 2 * outSize + j],1650weightData[i * W.cols + 3 * outSize + j]);1651}1652Wx = W.rowRange(0, W.rows - outSize).t();1653Wh = W.rowRange(W.rows - outSize, W.rows).t();16541655layerParams.blobs.resize(3);1656layerParams.blobs[0] = Wh;1657layerParams.blobs[1] = Wx;1658layerParams.blobs[2] = b;16591660if (hasLayerAttr(layer, "use_peephole"))1661{1662bool usePeephole = getLayerAttr(layer, "use_peephole").b();1663if (usePeephole)1664{1665layerParams.set("use_peephole", true);1666layerParams.blobs.resize(6);1667for (int i = 0; i < 3; ++i)1668{1669Mat w;1670blobFromTensor(getConstBlob(layer, value_id, 5 + i), w);1671w = w.reshape(1, w.total()); // Single column.1672w = Mat::diag(w); // Make a diagonal matrix.1673layerParams.blobs[3 + i] = w;1674}1675}1676}16771678int id = dstNet.addLayer(name, "LSTM", layerParams);1679layer_id[name] = id;16801681// one input only1682connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0);1683data_layouts[name] = DATA_LAYOUT_UNKNOWN;1684}1685else if (type == "ResizeNearestNeighbor" || type == "ResizeBilinear")1686{1687if (layer.input_size() == 2)1688{1689Mat outSize = getTensorContent(getConstBlob(layer, value_id, 1));1690CV_CheckTypeEQ(outSize.type(), CV_32SC1, ""); CV_CheckEQ(outSize.total(), (size_t)2, "");1691layerParams.set("height", outSize.at<int>(0, 0));1692layerParams.set("width", outSize.at<int>(0, 1));1693}1694else if (layer.input_size() == 3)1695{1696Mat factorHeight = getTensorContent(getConstBlob(layer, value_id, 1));1697Mat factorWidth = getTensorContent(getConstBlob(layer, value_id, 2));1698CV_CheckTypeEQ(factorHeight.type(), CV_32SC1, ""); CV_CheckEQ(factorHeight.total(), (size_t)1, "");1699CV_CheckTypeEQ(factorWidth.type(), CV_32SC1, ""); CV_CheckEQ(factorWidth.total(), (size_t)1, "");1700layerParams.set("zoom_factor_x", factorWidth.at<int>(0));1701layerParams.set("zoom_factor_y", factorHeight.at<int>(0));1702}1703else1704CV_Assert(layer.input_size() == 2 || layer.input_size() == 3);17051706if (type == "ResizeNearestNeighbor")1707layerParams.set("interpolation", "nearest");1708else1709layerParams.set("interpolation", "bilinear");17101711if (hasLayerAttr(layer, "align_corners"))1712layerParams.set("align_corners", getLayerAttr(layer, "align_corners").b());17131714int id = dstNet.addLayer(name, "Resize", layerParams);1715layer_id[name] = id;17161717connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);1718}1719else if (type == "L2Normalize")1720{1721// op: "L2Normalize"1722// input: "input"1723// input: "reduction_indices" (axis)1724CV_Assert(layer.input_size() == 2);1725Mat reductionIndices = getTensorContent(getConstBlob(layer, value_id, 1));1726CV_Assert(reductionIndices.type() == CV_32SC1);17271728const int numAxes = reductionIndices.total();1729if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC)1730for (int i = 0; i < numAxes; ++i)1731reductionIndices.at<int>(i) = toNCHW(reductionIndices.at<int>(i));17321733cv::sort(reductionIndices, reductionIndices, SORT_ASCENDING);1734for (int i = 1; i < numAxes; ++i)1735{1736CV_Assert(reductionIndices.at<int>(i) == reductionIndices.at<int>(i - 1) + 1);1737// Axes have the same sign.1738CV_Assert(reductionIndices.at<int>(i) * reductionIndices.at<int>(i - 1) >= 0);1739}1740layerParams.set("start_axis", reductionIndices.at<int>(0));1741layerParams.set("end_axis", reductionIndices.at<int>(numAxes - 1));17421743int id = dstNet.addLayer(name, "Normalize", layerParams);1744layer_id[name] = id;1745connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);1746}1747else if (type == "PriorBox")1748{1749if (hasLayerAttr(layer, "min_size"))1750layerParams.set("min_size", getLayerAttr(layer, "min_size").i());1751if (hasLayerAttr(layer, "max_size"))1752layerParams.set("max_size", getLayerAttr(layer, "max_size").i());1753if (hasLayerAttr(layer, "flip"))1754layerParams.set("flip", getLayerAttr(layer, "flip").b());1755if (hasLayerAttr(layer, "clip"))1756layerParams.set("clip", getLayerAttr(layer, "clip").b());1757if (hasLayerAttr(layer, "offset"))1758layerParams.set("offset", getLayerAttr(layer, "offset").f());1759if (hasLayerAttr(layer, "step"))1760layerParams.set("step", getLayerAttr(layer, "step").f());17611762const std::string paramNames[] = {"variance", "aspect_ratio", "scales",1763"width", "height"};1764for (int i = 0; i < 5; ++i)1765{1766if (hasLayerAttr(layer, paramNames[i]))1767{1768Mat values = getTensorContent(getLayerAttr(layer, paramNames[i]).tensor());1769layerParams.set(paramNames[i],1770DictValue::arrayReal<float*>((float*)values.data, values.total()));1771}1772}1773int id = dstNet.addLayer(name, "PriorBox", layerParams);1774layer_id[name] = id;1775connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);1776connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1);1777data_layouts[name] = DATA_LAYOUT_UNKNOWN;1778}1779else if (type == "Softmax")1780{1781if (hasLayerAttr(layer, "axis"))1782layerParams.set("axis", getLayerAttr(layer, "axis").i());17831784int id = dstNet.addLayer(name, "Softmax", layerParams);1785layer_id[name] = id;1786connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size());1787}1788else if (type == "CropAndResize")1789{1790// op: "CropAndResize"1791// input: "input"1792// input: "boxes"1793// input: "sizes"1794CV_Assert(layer.input_size() == 3);17951796Mat cropSize = getTensorContent(getConstBlob(layer, value_id, 2));1797CV_CheckTypeEQ(cropSize.type(), CV_32SC1, ""); CV_CheckEQ(cropSize.total(), (size_t)2, "");17981799layerParams.set("height", cropSize.at<int>(0));1800layerParams.set("width", cropSize.at<int>(1));18011802int id = dstNet.addLayer(name, "CropAndResize", layerParams);1803layer_id[name] = id;18041805connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);1806connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1);1807}1808else if (type == "Mean")1809{1810Mat indices = getTensorContent(getConstBlob(layer, value_id, 1));1811CV_Assert(indices.type() == CV_32SC1);18121813if (indices.total() != 2 || indices.at<int>(0) != 1 || indices.at<int>(1) != 2)1814CV_Error(Error::StsNotImplemented, "Unsupported mode of reduce_mean operation.");18151816layerParams.set("pool", "ave");1817layerParams.set("global_pooling", true);18181819int id = dstNet.addLayer(name, "Pooling", layerParams);1820layer_id[name] = id;18211822connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);18231824// There are two attributes, "keepdims" and a deprecated "keep_dims".1825bool keepDims = false;1826if (hasLayerAttr(layer, "keepdims"))1827keepDims = getLayerAttr(layer, "keepdims").b();1828else if (hasLayerAttr(layer, "keep_dims"))1829keepDims = getLayerAttr(layer, "keep_dims").b();18301831if (!keepDims)1832{1833LayerParams flattenLp;1834std::string flattenName = name + "/flatten";1835CV_Assert(layer_id.find(flattenName) == layer_id.end());1836int flattenId = dstNet.addLayer(flattenName, "Flatten", flattenLp);1837layer_id[flattenName] = flattenId;1838connect(layer_id, dstNet, Pin(name), flattenId, 0);1839}1840}1841else if (type == "ClipByValue")1842{1843// op: "ClipByValue"1844// input: "input"1845// input: "mix"1846// input: "max"1847CV_Assert(layer.input_size() == 3);18481849Mat minValue = getTensorContent(getConstBlob(layer, value_id, 1));1850Mat maxValue = getTensorContent(getConstBlob(layer, value_id, 2));1851CV_CheckEQ(minValue.total(), (size_t)1, ""); CV_CheckTypeEQ(minValue.type(), CV_32FC1, "");1852CV_CheckEQ(maxValue.total(), (size_t)1, ""); CV_CheckTypeEQ(maxValue.type(), CV_32FC1, "");18531854layerParams.set("min_value", minValue.at<float>(0));1855layerParams.set("max_value", maxValue.at<float>(0));18561857int id = dstNet.addLayer(name, "ReLU6", layerParams);1858layer_id[name] = id;18591860connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);1861}1862else if (type == "Abs" || type == "Tanh" || type == "Sigmoid" ||1863type == "Relu" || type == "Elu" ||1864type == "Identity" || type == "Relu6")1865{1866std::string dnnType = type;1867if (type == "Abs") dnnType = "AbsVal";1868else if (type == "Tanh") dnnType = "TanH";1869else if (type == "Relu") dnnType = "ReLU";1870else if (type == "Relu6") dnnType = "ReLU6";1871else if (type == "Elu") dnnType = "ELU";18721873int id = dstNet.addLayer(name, dnnType, layerParams);1874layer_id[name] = id;1875connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size());1876}1877else1878{1879// Importer does not know how to map this TensorFlow's operation onto OpenCV's layer.1880// However we create a layer with the same type and rely that user defined a custom layer.18811882// All the attributes are added to LayerParams.1883google::protobuf::Map<std::string, tensorflow::AttrValue> attr = layer.attr();1884for (google::protobuf::Map<std::string, tensorflow::AttrValue>::const_iterator ai = attr.begin();1885ai != attr.end(); ++ai)1886{1887if (ai->second.value_case() == tensorflow::AttrValue::kS) // string1888layerParams.set(ai->first, ai->second.s());1889if (ai->second.value_case() == tensorflow::AttrValue::kI) // int641890layerParams.set(ai->first, ai->second.i());1891if (ai->second.value_case() == tensorflow::AttrValue::kF) // float1892layerParams.set(ai->first, ai->second.f());1893if (ai->second.value_case() == tensorflow::AttrValue::kB) // bool1894layerParams.set(ai->first, ai->second.b());1895}18961897// All the Const input nodes are added to layer's blobs.1898std::vector<std::string> inputsNames;1899for (int i = 0; i < layer.input_size(); ++i)1900{1901// Check if input is a Const node.1902if (value_id.find(layer.input(i)) != value_id.end())1903{1904Mat blob = getTensorContent(getConstBlob(layer, value_id, i));1905layerParams.blobs.push_back(blob);1906}1907else1908inputsNames.push_back(layer.input(i));1909}1910int id = dstNet.addLayer(name, type, layerParams);1911layer_id[name] = id;19121913for (int i = 0; i < inputsNames.size(); ++i)1914{1915connect(layer_id, dstNet, parsePin(inputsNames[i]), id, i);1916}1917}1918}1919dstNet.setInputsNames(netInputsNames);1920}19211922} // namespace19231924#endif //HAVE_PROTOBUF19251926Net readNetFromTensorflow(const String &model, const String &config)1927{1928TFImporter importer(model.c_str(), config.c_str());1929Net net;1930importer.populateNet(net);1931return net;1932}19331934Net readNetFromTensorflow(const char* bufferModel, size_t lenModel,1935const char* bufferConfig, size_t lenConfig)1936{1937TFImporter importer(bufferModel, lenModel, bufferConfig, lenConfig);1938Net net;1939importer.populateNet(net);1940return net;1941}19421943Net readNetFromTensorflow(const std::vector<uchar>& bufferModel, const std::vector<uchar>& bufferConfig)1944{1945const char* bufferModelPtr = reinterpret_cast<const char*>(&bufferModel[0]);1946const char* bufferConfigPtr = bufferConfig.empty() ? NULL :1947reinterpret_cast<const char*>(&bufferConfig[0]);1948return readNetFromTensorflow(bufferModelPtr, bufferModel.size(),1949bufferConfigPtr, bufferConfig.size());1950}19511952void writeTextGraph(const String& _model, const String& output)1953{1954String model = _model;1955const std::string modelExt = model.substr(model.rfind('.') + 1);1956if (modelExt != "pb")1957CV_Error(Error::StsNotImplemented, "Only TensorFlow models support export to text file");19581959tensorflow::GraphDef net;1960ReadTFNetParamsFromBinaryFileOrDie(model.c_str(), &net);19611962sortByExecutionOrder(net);19631964RepeatedPtrField<tensorflow::NodeDef>::iterator it;1965for (it = net.mutable_node()->begin(); it != net.mutable_node()->end(); ++it)1966{1967if (it->op() == "Const")1968{1969it->mutable_attr()->at("value").mutable_tensor()->clear_tensor_content();1970}1971}19721973std::string content;1974google::protobuf::TextFormat::PrintToString(net, &content);19751976std::ofstream ofs(output.c_str());1977ofs << content;1978ofs.close();1979}19801981CV__DNN_INLINE_NS_END1982}} // namespace198319841985