Path: blob/master/modules/dnn/src/torch/torch_importer.cpp
16337 views
/*M///////////////////////////////////////////////////////////////////////////////////////1//2// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.3//4// By downloading, copying, installing or using the software you agree to this license.5// If you do not agree to this license, do not download, install,6// copy or use the software.7//8//9// License Agreement10// For Open Source Computer Vision Library11//12// Copyright (C) 2013, OpenCV Foundation, all rights reserved.13// Third party copyrights are property of their respective owners.14//15// Redistribution and use in source and binary forms, with or without modification,16// are permitted provided that the following conditions are met:17//18// * Redistribution's of source code must retain the above copyright notice,19// this list of conditions and the following disclaimer.20//21// * Redistribution's in binary form must reproduce the above copyright notice,22// this list of conditions and the following disclaimer in the documentation23// and/or other materials provided with the distribution.24//25// * The name of the copyright holders may not be used to endorse or promote products26// derived from this software without specific prior written permission.27//28// This software is provided by the copyright holders and contributors "as is" and29// any express or implied warranties, including, but not limited to, the implied30// warranties of merchantability and fitness for a particular purpose are disclaimed.31// In no event shall the Intel Corporation or contributors be liable for any direct,32// indirect, incidental, special, exemplary, or consequential damages33// (including, but not limited to, procurement of substitute goods or services;34// loss of use, data, or profits; or business interruption) however caused35// and on any theory of liability, whether in contract, strict liability,36// or tort (including negligence or otherwise) arising in any way out of37// the use of this software, even if advised of the possibility of such damage.38//39//M*/4041#include "../precomp.hpp"42#include <limits>43#include <set>44#include <map>45#include <algorithm>46#include <iostream>47#include <fstream>4849#include "THDiskFile.h"5051namespace cv {52namespace dnn {53CV__DNN_INLINE_NS_BEGIN5455using namespace TH;5657//#ifdef NDEBUG58static bool dbgPrint = false;59//#else60//static bool dbgPrint = true;61//#endif6263enum LuaType64{65TYPE_NIL = 0,66TYPE_NUMBER = 1,67TYPE_STRING = 2,68TYPE_TABLE = 3,69TYPE_TORCH = 4,70TYPE_BOOLEAN = 5,71TYPE_FUNCTION = 6,72TYPE_RECUR_FUNCTION = 8,73LEGACY_TYPE_RECUR_FUNCTION = 774};7576// We use OpenCV's types to manage CV_ELEM_SIZE.77enum TorchType78{79TYPE_DOUBLE = CV_64F,80TYPE_FLOAT = CV_32F,81TYPE_BYTE = CV_8U,82TYPE_CHAR = CV_8S,83TYPE_SHORT = CV_16S,84TYPE_INT = CV_32S,85TYPE_LONG = CV_32SC286};8788template<typename T>89static String toString(const T &v)90{91std::ostringstream ss;92ss << v;93return ss.str();94}9596static inline bool startsWith(const String &str, const char *substr)97{98return str.find(substr) == 0;99}100101static inline bool endsWith(const String &str, const char *substr)102{103return str.rfind(substr) == str.length() - strlen(substr);104}105106struct TorchImporter107{108typedef std::map<String, std::pair<int, Mat> > TensorsMap;109Net net;110111cv::Ptr<THFile> file;112std::set<int> readedIndexes;113std::map<int, Mat> storages;114std::map<int, Mat> tensors;115// Stack with numbers of unconnected layers per scope (Sequential, ConcatTable etc.)116std::vector<int> numUnconnectedLayers;117118struct Module119{120String thName, apiType;121dnn::LayerParams params;122std::vector<cv::Ptr<Module> > modules;123124Module(const String &_thName, const String &_apiType = String())125: thName(_thName), apiType(_apiType) {}126};127128Module *rootModule;129Module *curModule;130int moduleCounter;131132TorchImporter(String filename, bool isBinary)133{134CV_TRACE_FUNCTION();135136rootModule = curModule = NULL;137moduleCounter = 0;138139file = cv::Ptr<THFile>(THDiskFile_new(filename, "r", 0), THFile_free);140CV_Assert(file && THFile_isOpened(file));141142if (isBinary)143THFile_binary(file);144else145THFile_ascii(file);146}147148/* Simple readers */149150inline int readInt()151{152return THFile_readIntScalar(file);153}154155inline long readLong()156{157return THFile_readLongScalar(file);158}159160inline bool readBool()161{162return readInt() != 0;163}164165inline double readDouble()166{167return THFile_readDoubleScalar(file);168}169170inline String readString()171{172int size = THFile_readIntScalar(file);173String str(size, '\0');174THFile_readCharRaw(file, const_cast<char*>(str.c_str()), size);175return str;176}177178inline String readTorchClassName()179{180String version = readString();181return startsWith(version, "V ") ? readString() : version;182}183184inline void readFunction()185{186readString();187readObject();188}189190void readTable(int index = -1)191{192index = (index < 0) ? readInt() : index;193194if (readedIndexes.count(index))195return;196197readedIndexes.insert(index);198199int size = readInt();200201for (int i = 0; i < size; i++)202{203readObject(); //key204readObject(); //value205}206}207208/* Special readers */209210static inline int parseTorchType(const String &str, const char *suffix, const char *prefix = "torch.")211{212if (startsWith(str, prefix) && endsWith(str, suffix))213{214String typeStr = str.substr(strlen(prefix), str.length() - strlen(prefix) - strlen(suffix));215216if (typeStr == "Double")217return TYPE_DOUBLE;218else if (typeStr == "Float" || typeStr == "Cuda")219return TYPE_FLOAT;220else if (typeStr == "Byte")221return TYPE_BYTE;222else if (typeStr == "Char")223return TYPE_CHAR;224else if (typeStr == "Short")225return TYPE_SHORT;226else if (typeStr == "Int")227return TYPE_INT;228else if (typeStr == "Long")229return TYPE_LONG;230else231CV_Error(Error::StsNotImplemented, "Unknown type \"" + typeStr + "\" of torch class \"" + str + "\"");232}233234return -1;235}236237static int parseTensorType(const String &className)238{239return parseTorchType(className, "Tensor");240}241242static int parseStorageType(const String &className)243{244return parseTorchType(className, "Storage");245}246247void readTorchStorage(int index, int type = -1)248{249long size = readLong();250Mat storageMat;251252switch (type)253{254case TYPE_FLOAT:255storageMat.create(1, size, CV_32F);256THFile_readFloatRaw(file, (float*)storageMat.data, size);257break;258case TYPE_DOUBLE:259storageMat.create(1, size, CV_64F);260THFile_readDoubleRaw(file, (double*)storageMat.data, size);261break;262case TYPE_CHAR:263storageMat.create(1, size, CV_8S);264THFile_readByteRaw(file, (uchar*)storageMat.data, size);265break;266case TYPE_BYTE:267storageMat.create(1, size, CV_8U);268THFile_readByteRaw(file, (uchar*)storageMat.data, size);269break;270case TYPE_SHORT:271storageMat.create(1, size, CV_16S);272THFile_readShortRaw(file, (short*)storageMat.data, size);273break;274case TYPE_INT:275storageMat.create(1, size, CV_32S);276THFile_readIntRaw(file, (int*)storageMat.data, size);277break;278case TYPE_LONG:279{280storageMat.create(1, size, CV_64F); //handle LongStorage as CV_64F Mat281double *buf = storageMat.ptr<double>();282THFile_readLongRaw(file, (int64*)buf, size);283284for (size_t i = (size_t)size; i-- > 0; )285buf[i] = ((int64*)buf)[i];286break;287}288default:289CV_Error(Error::StsInternal, "");290break;291}292293storages.insert(std::make_pair(index, storageMat));294}295296void readTorchTable(Dict &scalarParams, TensorsMap &tensorParams)297{298int luaType = readInt();299int index = readInt();300301CV_Assert(luaType == TYPE_TABLE && readedIndexes.count(index) == 0);302readedIndexes.insert(index);303304long fpos;305int numPairs = readInt();306307for (int i = 0; i < numPairs; i++)308{309fpos = THFile_position(file);310int ktype = readInt();311312if (ktype != TYPE_STRING) //skip non-string fileds313{314THFile_seek(file, fpos);315readObject(); //key316readObject(); //value317continue;318}319320String key = readString();321if (dbgPrint)322std::cout << i << "th key: " << key << "\n";323324fpos = THFile_position(file);325int vtype = readInt();326327if (vtype == TYPE_TORCH)328{329int index = readInt();330int numModules = curModule->modules.size();331readTorchObject(index);332333if (tensors.count(index)) //tensor was read334{335tensorParams.insert(std::make_pair(key, std::make_pair(index, tensors[index])));336}337else if (storages.count(index)) //storage was read338{339Mat &matStorage = storages[index];340Mat matCasted;341matStorage.convertTo(matCasted, CV_64F);342343DictValue scalar = DictValue::arrayReal(matCasted.ptr<double>(), matCasted.total());344scalarParams.set(key, scalar);345}346else347{348// Only tensors and scalars are supported for table fields.349// i.e. nn.Inception has field `transfer` which is an350// activation layer. So we remove added modules as readTorchObject(index).351while (curModule->modules.size() > numModules)352curModule->modules.pop_back();353}354}355else if (vtype == TYPE_NUMBER)356{357scalarParams.set(key, readDouble());358}359else if (vtype == TYPE_STRING)360{361scalarParams.set(key, readString());362}363else if (vtype == TYPE_BOOLEAN)364{365scalarParams.set(key, readBool());366}367else368{369THFile_seek(file, fpos);370readObject();371}372}373374//Debug output375if (dbgPrint)376{377std::cout << "scalarParams:\n";378std::cout << scalarParams;379380std::cout << "#" << tensorParams.size() << " tensorParams:\n";381std::map<String,std::pair<int, Mat> >::const_iterator it;382for (it = tensorParams.begin(); it != tensorParams.end(); it++)383std::cout << it->first << ": Tensor " << it->second.second.size << "\n";384}385}386387void readTorchTensor(int indexTensor, int typeTensor)388{389int ndims = readInt();390AutoBuffer<int64, 4> sizes(ndims);391AutoBuffer<int64, 4> steps(ndims);392THFile_readLongRaw(file, sizes.data(), ndims);393THFile_readLongRaw(file, steps.data(), ndims);394long offset = readLong() - 1;395396//read Storage397int typeidx = readInt();398CV_Assert(typeidx == TYPE_TORCH || (typeidx == TYPE_NIL && ndims == 0));399400if (typeidx == TYPE_NIL)401{402tensors.insert(std::make_pair(indexTensor, Mat()));403return;404}405406int indexStorage = readInt();407if (readedIndexes.count(indexStorage) == 0)408{409String className = readTorchClassName();410int typeStorage = parseStorageType(className);411CV_Assert(typeStorage >= 0 && typeTensor == typeStorage);412readTorchStorage(indexStorage, typeStorage);413typeTensor = storages[indexStorage].type();414readedIndexes.insert(indexStorage);415}416417//small check418size_t requireElems = (size_t)offset + (size_t)steps[0] * (size_t)sizes[0];419size_t storageElems = storages[indexStorage].total();420if (requireElems > storageElems)421CV_Error(Error::StsBadSize, "Storage has insufficient number of elements for requested Tensor");422423//convert sizes424AutoBuffer<int, 4> isizes(ndims);425AutoBuffer<size_t, 4> ssteps(ndims);426for (int i = ndims - 1; i >= 0; i--)427{428isizes[i] = (int)sizes[i];429ssteps[i] = (size_t)steps[i] * CV_ELEM_SIZE(typeTensor);430}431432//allocate Blob433Mat srcMat(ndims, isizes.data(), typeTensor , storages[indexStorage].ptr() + offset*CV_ELEM_SIZE(typeTensor), ssteps.data());434int dstType = CV_32F;435436Mat blob;437srcMat.convertTo(blob, dstType);438439tensors.insert(std::make_pair(indexTensor, blob));440}441442static bool isNNClass(const String &className, String &nnName)443{444const char *prefixes[] = {"nn.", "cunn.", "cudnn.", "fbcunn.", NULL};445446for (int i = 0; prefixes[i]; i++)447{448if (startsWith(className, prefixes[i]))449{450nnName = className.substr(strlen(prefixes[i]));451return true;452}453}454455return false;456}457458static void convertTorchKernelsParams(const Dict &torchParams, cv::dnn::LayerParams &layerParams)459{460layerParams.set("kernel_h", torchParams.get<int>("kH"));461layerParams.set("kernel_w", torchParams.get<int>("kW"));462layerParams.set("stride_h", torchParams.get<int>("dH"));463layerParams.set("stride_w", torchParams.get<int>("dW"));464layerParams.set("pad_h", torchParams.get<int>("padH", 0));465layerParams.set("pad_w", torchParams.get<int>("padW", 0));466}467468void readTorchObject(int index)469{470if(readedIndexes.count(index))471return;472473String className = readTorchClassName();474String nnName;475476if (dbgPrint)477std::cout << "Class: " << className << std::endl;478479int type;480if ( (type = parseTensorType(className)) >= 0 ) //is Tensor481{482readTorchTensor(index, type);483}484else if ( (type = parseStorageType(className)) >= 0 ) //is Storage485{486readTorchStorage(index, type);487}488else if (isNNClass(className, nnName))489{490Dict scalarParams;491TensorsMap tensorParams;492493cv::Ptr<Module> newModule(new Module(nnName));494cv::dnn::LayerParams &layerParams = newModule->params;495496layerParams.set("torch_index", index);497498if (nnName == "Sequential" || nnName == "Parallel" ||499nnName == "Concat" || nnName == "ConcatTable" || nnName == "JoinTable" ||500nnName == "DepthConcat" || nnName == "Inception")501{502Module *parentModule = curModule;503curModule->modules.push_back(newModule);504curModule = newModule;505readTorchTable(scalarParams, tensorParams);506curModule = parentModule;507508if (nnName == "Parallel")509{510layerParams.set("inputDimension", scalarParams.get<int>("inputDimension"));511layerParams.set("outputDimension", scalarParams.get<int>("outputDimension"));512}513else if (nnName == "Concat" || nnName == "JoinTable" || nnName == "DepthConcat")514{515layerParams.set("dimension", scalarParams.get<int>("dimension"));516}517}518else if (nnName == "SpatialConvolution" || nnName == "SpatialConvolutionMM")519{520newModule->apiType = "Convolution";521readTorchTable(scalarParams, tensorParams);522523CV_Assert(tensorParams.count("weight"));524layerParams.blobs.push_back(tensorParams["weight"].second);525526bool bias = tensorParams.count("bias") != 0;527layerParams.set("bias_term", bias);528if (bias)529layerParams.blobs.push_back(tensorParams["bias"].second);530531layerParams.set("num_output", scalarParams.get<int>("nOutputPlane"));532convertTorchKernelsParams(scalarParams, layerParams);533534if (nnName == "SpatialConvolutionMM")535{536// Split weights from a [ outCh x inCh*kH*kW ] 2D matrix537// onto a 4D [ outCh x inCh x kH x kW ] blob.538CV_Assert(layerParams.blobs[0].dims == 2);539const int kernel = layerParams.blobs[0].size[1]; // inCh * kH * kW540MatShape kernelShape(4);541kernelShape[0] = layerParams.blobs[0].size[0]; // outCh.542kernelShape[2] = layerParams.get<int>("kernel_h");543kernelShape[3] = layerParams.get<int>("kernel_w");544kernelShape[1] = kernel / (kernelShape[2] * kernelShape[3]); // inCh.545layerParams.blobs[0] = layerParams.blobs[0].reshape(1, kernelShape);546}547curModule->modules.push_back(newModule);548}549else if (nnName == "SpatialLPPooling")550{551// nn.Sequential {552// [input -> (1) -> (2) -> output]553// (1): nn.Sequential {554// [input -> (1) -> (2) -> (3) -> (4) -> output]555// (1): nn.Power556// (2): nn.SpatialAveragePooling(...)557// (3): nn.MulConstant558// (4): nn.Power559// }560// (2): nn.Sigmoid561// }562// nn.SpatialLPPooling is just a table so we skip it.563readTorchTable(scalarParams, tensorParams);564}565else if (nnName == "SpatialMaxPooling" || nnName == "SpatialAveragePooling")566{567newModule->apiType = "Pooling";568readTorchTable(scalarParams, tensorParams);569570if (nnName == "SpatialMaxPooling") {571layerParams.set("pool", "MAX");572layerParams.set("indices_blob_id", tensorParams["indices"].first);573}574if (nnName == "SpatialAveragePooling")575{576layerParams.set("pool", "AVE");577layerParams.set("ave_pool_padded_area", scalarParams.has("count_include_pad") &&578scalarParams.get<bool>("count_include_pad"));579}580convertTorchKernelsParams(scalarParams, layerParams);581582CV_Assert(scalarParams.has("ceil_mode"));583layerParams.set("ceil_mode", scalarParams.get<bool>("ceil_mode"));584585curModule->modules.push_back(newModule);586}587else if (nnName == "Linear")588{589newModule->apiType = "InnerProduct";590readTorchTable(scalarParams, tensorParams);591592CV_Assert(tensorParams.count("weight"));593Mat weightBlob = tensorParams["weight"].second;594layerParams.blobs.push_back(weightBlob);595596bool bias = tensorParams.count("bias") != 0;597if (bias)598layerParams.blobs.push_back(tensorParams["bias"].second);599layerParams.set("bias_term", bias);600601layerParams.set("num_output", weightBlob.size[0]);602curModule->modules.push_back(newModule);603}604else if (nnName == "Reshape" || nnName == "View")605{606newModule->apiType = "Reshape";607608readTorchTable(scalarParams, tensorParams);609CV_Assert(scalarParams.has("size"));610611DictValue dimParam = scalarParams.get("size");612layerParams.set("dim", dimParam);613614int axis = (int)scalarParams.get<bool>("batchMode", true);615layerParams.set("axis", axis);616617curModule->modules.push_back(newModule);618}619else if (nnName == "ReLU")620{621curModule->modules.push_back(cv::Ptr<Module>(new Module(nnName, "ReLU")));622readObject();623}624else if (nnName == "Tanh")625{626curModule->modules.push_back(cv::Ptr<Module>(new Module(nnName, "TanH")));627readObject();628}629else if (nnName == "Sigmoid")630{631curModule->modules.push_back(cv::Ptr<Module>(new Module(nnName, "Sigmoid")));632readObject();633}634else if (nnName == "SpatialBatchNormalization" || nnName == "InstanceNormalization" ||635nnName == "BatchNormalization")636{637newModule->apiType = "BatchNorm";638readTorchTable(scalarParams, tensorParams);639640CV_Assert(scalarParams.has("eps"));641float eps = float(scalarParams.get<double>("eps"));642layerParams.set("eps", eps);643644if (tensorParams.count("running_mean"))645{646layerParams.blobs.push_back(tensorParams["running_mean"].second);647}648else649{650CV_Assert(scalarParams.has("nOutput"));651layerParams.blobs.push_back(Mat::zeros(1, scalarParams.get<int>("nOutput"), CV_32F));652}653654if (tensorParams.count("running_var"))655{656layerParams.blobs.push_back(tensorParams["running_var"].second);657}658else if (tensorParams.count("running_std"))659{660layerParams.blobs.push_back(tensorParams["running_std"].second);661pow(layerParams.blobs.back(), -2, layerParams.blobs.back());662subtract(layerParams.blobs.back(), eps, layerParams.blobs.back());663}664else665{666CV_Assert(scalarParams.has("nOutput"));667layerParams.blobs.push_back(Mat::ones(1, scalarParams.get<int>("nOutput"), CV_32F));668}669670if (tensorParams.count("weight"))671{672layerParams.set("has_weight", true);673layerParams.blobs.push_back(tensorParams["weight"].second);674}675676if (tensorParams.count("bias"))677{678layerParams.set("has_bias", true);679layerParams.blobs.push_back(tensorParams["bias"].second);680}681682if (nnName == "InstanceNormalization")683{684cv::Ptr<Module> mvnModule(new Module(nnName));685mvnModule->apiType = "MVN";686curModule->modules.push_back(mvnModule);687688layerParams.blobs[0].setTo(0); // batch norm's mean689layerParams.blobs[1].setTo(1); // batch norm's std690}691692curModule->modules.push_back(newModule);693}694else if (nnName == "PReLU")695{696readTorchTable(scalarParams, tensorParams);697698CV_Assert(tensorParams.count("weight"));699700size_t outputChannels = static_cast<int>(scalarParams.get<double>("nOutputPlane"));701if (outputChannels) {702703CV_Assert(tensorParams["weight"].second.total() == outputChannels);704layerParams.blobs.push_back(tensorParams["weight"].second);705706newModule->apiType = "ChannelsPReLU";707}708else {709CV_Assert(tensorParams["weight"].second.total() == 1);710float negative_slope = *tensorParams["weight"].second.ptr<float>();711layerParams.set("negative_slope", negative_slope);712713newModule->apiType = "ReLU";714}715716curModule->modules.push_back(newModule);717}718else if (nnName == "SpatialDropout" || nnName == "Dropout")719{720readTorchTable(scalarParams, tensorParams);721CV_Assert(scalarParams.has("p"));722723if (scalarParams.has("v2") && scalarParams.get<bool>("v2"))724{725newModule->apiType = "Identity";726}727else728{729float scale = 1 - scalarParams.get<double>("p");730731CV_Assert(scale > 0);732733newModule->apiType = "Power";734layerParams.set("scale", scale);735}736curModule->modules.push_back(newModule);737}738// TotalVariation layer is from fast-neural-style project: https://github.com/jcjohnson/fast-neural-style739// It's a loss function that has an Identity forward.740else if (nnName == "Identity" || nnName == "TotalVariation")741{742readTorchTable(scalarParams, tensorParams);743newModule->apiType = "Identity";744curModule->modules.push_back(newModule);745}746else if (nnName == "Normalize")747{748readTorchTable(scalarParams, tensorParams);749CV_Assert(scalarParams.has("p"));750751layerParams.set("p", scalarParams.get<float>("p"));752if (scalarParams.has("eps"))753layerParams.set("eps", scalarParams.get<float>("eps"));754755newModule->apiType = "Normalize";756curModule->modules.push_back(newModule);757}758else if (nnName == "Padding")759{760readTorchTable(scalarParams, tensorParams);761newModule->apiType = "Padding";762763CV_Assert(scalarParams.has("pad") && scalarParams.has("dim"));764if (scalarParams.has("index") && scalarParams.get<int>("index") != 1)765CV_Error(Error::StsNotImplemented, "Padding with offset is not implemented");766767if (scalarParams.has("value"))768layerParams.set("value", scalarParams.get<float>("value"));769770if (scalarParams.has("nInputDim"))771layerParams.set("input_dims", scalarParams.get<int>("nInputDim"));772773int dim = scalarParams.get<int>("dim") - 1; // In Lua we start from 1.774int pad = scalarParams.get<int>("pad");775776std::vector<int> paddings((dim + 1) * 2, 0);777if (pad > 0)778paddings[dim * 2 + 1] = pad; // Pad after (right).779else780paddings[dim * 2] = -pad; // Pad before (left).781layerParams.set("paddings", DictValue::arrayInt<int*>(&paddings[0], paddings.size()));782783curModule->modules.push_back(newModule);784}785else if (nnName == "CAddTable")786{787curModule->modules.push_back(newModule);788readObject();789}790else if (nnName == "SpatialDilatedConvolution")791{792readTorchTable(scalarParams, tensorParams);793newModule->apiType = "Convolution";794CV_Assert(scalarParams.has("padW") &&795scalarParams.has("padH")&&796scalarParams.has("dW")&&797scalarParams.has("dH")&&798scalarParams.has("dilationW")&&799scalarParams.has("dilationH")&&800scalarParams.has("kW")&&801scalarParams.has("kH")&&802scalarParams.has("nOutputPlane"));803804layerParams.set("kernel_w", static_cast<int>(scalarParams.get<double>("kW")));805layerParams.set("kernel_h", static_cast<int>(scalarParams.get<double>("kH")));806layerParams.set("pad_w", static_cast<int>(scalarParams.get<double>("padW")));807layerParams.set("pad_h", static_cast<int>(scalarParams.get<double>("padH")));808layerParams.set("stride_w", static_cast<int>(scalarParams.get<double>("dW")));809layerParams.set("stride_h", static_cast<int>(scalarParams.get<double>("dH")));810layerParams.set("dilation_w", static_cast<int>(scalarParams.get<double>("dilationW")));811layerParams.set("dilation_h", static_cast<int>(scalarParams.get<double>("dilationH")));812layerParams.set("num_output", static_cast<int>(scalarParams.get<double>("nOutputPlane")));813814layerParams.blobs.push_back(tensorParams["weight"].second);815816bool bias = tensorParams.count("bias");817layerParams.set("bias_term", bias);818if (bias)819layerParams.blobs.push_back(tensorParams["bias"].second);820821curModule->modules.push_back(newModule);822}823else if (nnName == "SpatialFullConvolution")824{825readTorchTable(scalarParams, tensorParams);826newModule->apiType = "Deconvolution";827CV_Assert(scalarParams.has("padW") &&828scalarParams.has("padH")&&829scalarParams.has("dW")&&830scalarParams.has("dH")&&831scalarParams.has("adjW")&&832scalarParams.has("adjH")&&833scalarParams.has("kW")&&834scalarParams.has("kH")&&835scalarParams.has("nOutputPlane"));836837layerParams.set("kernel_w", static_cast<int>(scalarParams.get<double>("kW")));838layerParams.set("kernel_h", static_cast<int>(scalarParams.get<double>("kH")));839layerParams.set("pad_w", static_cast<int>(scalarParams.get<double>("padW")));840layerParams.set("pad_h", static_cast<int>(scalarParams.get<double>("padH")));841layerParams.set("stride_w", static_cast<int>(scalarParams.get<double>("dW")));842layerParams.set("stride_h", static_cast<int>(scalarParams.get<double>("dH")));843layerParams.set("adj_w", static_cast<int>(scalarParams.get<double>("adjW")));844layerParams.set("adj_h", static_cast<int>(scalarParams.get<double>("adjH")));845layerParams.set("num_output", static_cast<int>(scalarParams.get<double>("nOutputPlane")));846847layerParams.blobs.push_back(tensorParams["weight"].second);848849bool bias = tensorParams.count("bias");850layerParams.set("bias_term", bias);851if (bias)852layerParams.blobs.push_back(tensorParams["bias"].second);853854curModule->modules.push_back(newModule);855}856else if (nnName == "SpatialMaxUnpooling")857{858readTorchTable(scalarParams, tensorParams);859CV_Assert(tensorParams.count("indices"));860861layerParams.set("indices_blob_id", tensorParams["indices"].first);862curModule->modules.push_back(newModule);863}864else if (nnName == "SoftMax")865{866newModule->apiType = "SoftMax";867curModule->modules.push_back(newModule);868}869else if (nnName == "LogSoftMax")870{871newModule->apiType = "SoftMax";872layerParams.set("log_softmax", true);873curModule->modules.push_back(newModule);874}875else if (nnName == "SpatialCrossMapLRN")876{877newModule->apiType = "LRN";878readTorchTable(scalarParams, tensorParams);879880CV_Assert(scalarParams.has("alpha"));881CV_Assert(scalarParams.has("beta"));882CV_Assert(scalarParams.has("k"));883CV_Assert(scalarParams.has("size"));884885layerParams.set("norm_region", "ACROSS_CHANNELS");886layerParams.set("alpha", scalarParams.get<float>("alpha"));887layerParams.set("beta", scalarParams.get<float>("beta"));888layerParams.set("bias", scalarParams.get<float>("k"));889layerParams.set("local_size", scalarParams.get<int>("size"));890layerParams.set("norm_by_size", true);891892curModule->modules.push_back(newModule);893}894else if (nnName == "Square" || nnName == "Sqrt" || nnName == "Power")895{896readTorchTable(scalarParams, tensorParams);897898float power;899if (nnName == "Square") power = 2.0f;900else if (nnName == "Sqrt") power = 0.5f;901else if (nnName == "Power") power = scalarParams.get<float>("pow", 1.0f);902903newModule->apiType = "Power";904layerParams.set("power", power);905curModule->modules.push_back(newModule);906}907else if (nnName == "MulConstant")908{909readTorchTable(scalarParams, tensorParams);910CV_Assert(scalarParams.has("constant_scalar"));911newModule->apiType = "Power";912layerParams.set("scale", scalarParams.get<float>("constant_scalar"));913curModule->modules.push_back(newModule);914}915else if (nnName == "SpatialZeroPadding" || nnName == "SpatialReflectionPadding")916{917readTorchTable(scalarParams, tensorParams);918CV_Assert_N(scalarParams.has("pad_l"), scalarParams.has("pad_r"),919scalarParams.has("pad_t"), scalarParams.has("pad_b"));920int padTop = scalarParams.get<int>("pad_t");921int padLeft = scalarParams.get<int>("pad_l");922int padRight = scalarParams.get<int>("pad_r");923int padBottom = scalarParams.get<int>("pad_b");924if (padTop < 0 || padLeft < 0 || padRight < 0 || padBottom < 0)925CV_Error(Error::StsNotImplemented, "SpatialZeroPadding in cropping mode is not implemented");926927newModule->apiType = "Padding";928929// Torch's SpatialZeroPadding works with 3- or 4-dimensional input.930// So we add parameter input_dims=3 to ignore batch dimension if it will be.931std::vector<int> paddings(6, 0); // CHW932paddings[2] = padTop;933paddings[3] = padBottom;934paddings[4] = padLeft;935paddings[5] = padRight;936layerParams.set("paddings", DictValue::arrayInt<int*>(&paddings[0], paddings.size()));937layerParams.set("input_dims", 3);938939if (nnName == "SpatialReflectionPadding")940layerParams.set("type", "reflect");941942curModule->modules.push_back(newModule);943}944else if (nnName == "ShaveImage")945{946// ShaveImage layer is from fast-neural-style project: https://github.com/jcjohnson/fast-neural-style947// It may be mapped to Slice layer.948readTorchTable(scalarParams, tensorParams);949CV_Assert(scalarParams.has("size"));950int size = scalarParams.get<int>("size");951952int begins[] = {0, 0, size, size};953int ends[] = {-1, -1, -size - 1, -size - 1};954955newModule->apiType = "Slice";956layerParams.set("begin", DictValue::arrayInt<int*>(&begins[0], 4));957layerParams.set("end", DictValue::arrayInt<int*>(&ends[0], 4));958curModule->modules.push_back(newModule);959}960else if (nnName == "SpatialUpSamplingNearest")961{962readTorchTable(scalarParams, tensorParams);963CV_Assert(scalarParams.has("scale_factor"));964int scale_factor = scalarParams.get<int>("scale_factor");965newModule->apiType = "Resize";966layerParams.set("interpolation", "nearest");967layerParams.set("zoom_factor", scale_factor);968curModule->modules.push_back(newModule);969}970else971{972// Importer does not know how to map Torch's layer type to an OpenCV's one.973// However we parse all the parameters to let user create a custom layer.974readTorchTable(scalarParams, tensorParams);975for (std::map<String, DictValue>::const_iterator it = scalarParams.begin();976it != scalarParams.end(); ++it)977{978layerParams.set(it->first, it->second);979}980for (std::map<String, std::pair<int, Mat> >::iterator it = tensorParams.begin();981it != tensorParams.end(); ++it)982{983layerParams.blobs.push_back(it->second.second);984}985newModule->apiType = nnName;986curModule->modules.push_back(newModule);987}988}989else990{991CV_Error(Error::StsNotImplemented, "Unsupported Torch class \"" + className + "\"");992}993994readedIndexes.insert(index);995}996997void readObject()998{999int typeidx = readInt();10001001if (typeidx == TYPE_TORCH)1002{1003int index = readInt();1004readTorchObject(index);1005readedIndexes.insert(index);1006}1007else if (typeidx == TYPE_NIL)1008return;1009else if (typeidx == TYPE_NUMBER)1010readDouble();1011else if (typeidx == TYPE_BOOLEAN)1012readBool();1013else if (typeidx == TYPE_STRING)1014readString();1015else if (typeidx == TYPE_TABLE)1016readTable();1017else1018CV_Error(Error::StsNotImplemented, "Unsupported Lua type");1019}10201021inline String generateLayerName(const String &label = String())1022{1023return "l" + toString(++this->moduleCounter) + "_" + label;1024}10251026int fill(Module *module, std::vector<std::pair<int, Module*> >& addedModules, int prevLayerId = 0, int prevOutNum = 0)1027{1028if (module == NULL)1029return prevLayerId;10301031if (module->apiType.length())1032{1033int newLayerId = net.addLayer(generateLayerName(module->apiType), module->apiType, module->params);1034net.connect(prevLayerId, prevOutNum, newLayerId, 0);1035addedModules.push_back(std::make_pair(newLayerId, module));1036return newLayerId;1037}1038else1039{1040if (module->thName == "Sequential" || module->thName == "Inception")1041{1042for (size_t i = 0; i < module->modules.size(); i++)1043{1044prevLayerId = fill(module->modules[i], addedModules, prevLayerId, prevOutNum);1045prevOutNum = 0;1046}1047return prevLayerId;1048}1049else if (module->thName == "Concat")1050{1051int newId, mergeId;1052LayerParams mergeParams;1053mergeParams.set("axis", module->params.get<int>("dimension") - 1);10541055std::vector<int> branchIds;1056for (int i = 0; i < (int)module->modules.size(); i++)1057{1058newId = fill(module->modules[i], addedModules, prevLayerId, prevOutNum);1059branchIds.push_back(newId);1060}10611062moduleCounter += 1; // Skip split layer creation. See https://github.com/opencv/opencv/pull/9384.1063mergeId = net.addLayer(generateLayerName("torchMerge"), "Concat", mergeParams);10641065for (int i = 0; i < branchIds.size(); i++)1066{1067net.connect(branchIds[i], 0, mergeId, i);1068}10691070addedModules.push_back(std::make_pair(mergeId, module));1071return mergeId;1072}1073else if (module->thName == "DepthConcat")1074{1075int newId, mergeId;1076LayerParams mergeParams;1077mergeParams.set("axis", module->params.get<int>("dimension") - 1);1078mergeParams.set("padding", true);10791080std::vector<int> branchIds;1081for (int i = 0; i < (int)module->modules.size(); i++)1082{1083newId = fill(module->modules[i], addedModules, prevLayerId, prevOutNum);1084branchIds.push_back(newId);1085}10861087mergeId = net.addLayer(generateLayerName("torchMerge"), "Concat", mergeParams);10881089for (int i = 0; i < branchIds.size(); i++)1090{1091net.connect(branchIds[i], 0, mergeId, i);1092}10931094addedModules.push_back(std::make_pair(mergeId, module));1095return mergeId;1096}1097else if (module->thName == "Parallel")1098{1099int newId, splitId, mergeId, reshapeId;11001101LayerParams splitParams, mergeParams, reshapeParams;1102splitParams.set("axis", module->params.get<int>("inputDimension") - 1);1103mergeParams.set("axis", module->params.get<int>("outputDimension") - 1);1104reshapeParams.set("axis", splitParams.get<int>("axis"));1105reshapeParams.set("num_axes", 1);11061107splitId = net.addLayer(generateLayerName("torchSplit"), "Slice", splitParams);1108reshapeId = net.addLayer(generateLayerName("torchReshape"), "Reshape", reshapeParams);1109net.connect(prevLayerId, prevOutNum, splitId, 0);11101111std::vector<int> branchIds;1112for (int i = 0; i < (int)module->modules.size(); i++)1113{1114net.connect(splitId, i, reshapeId, i);1115newId = fill(module->modules[i], addedModules, reshapeId, i);1116branchIds.push_back(newId);1117}11181119mergeId = net.addLayer(generateLayerName("torchMerge"), "Concat", mergeParams);11201121for (int i = 0; i < branchIds.size(); i++)1122{1123net.connect(branchIds[i], 0, mergeId, i);1124}11251126addedModules.push_back(std::make_pair(mergeId, module));1127return mergeId;1128}1129else if (module->thName == "ConcatTable") {1130int newId = -1;1131moduleCounter += 1; // Skip split layer creation. See https://github.com/opencv/opencv/pull/9384.1132for (int i = 0; i < (int)module->modules.size(); i++)1133{1134newId = fill(module->modules[i], addedModules, prevLayerId, prevOutNum);1135}1136numUnconnectedLayers.push_back(module->modules.size());1137return newId;1138}1139else if (module->thName == "JoinTable") {1140std::vector<int> ids = net.getUnconnectedOutLayers();11411142int mergeId;1143LayerParams mergeParams;1144mergeParams.set("axis", module->params.get<int>("dimension") - 1);11451146mergeId = net.addLayer(generateLayerName("torchMerge"), "Concat", mergeParams);1147addedModules.push_back(std::make_pair(mergeId, module));11481149// Connect to the last number of unconnected layers.1150CV_Assert(!numUnconnectedLayers.empty());1151const int numInputs = numUnconnectedLayers.back();1152numUnconnectedLayers.pop_back();1153CV_Assert(numInputs <= ids.size());1154for (int i = 0; i < numInputs; i++)1155{1156net.connect(ids[ids.size() - numInputs + i], 0, mergeId, i);1157}11581159return mergeId;1160}1161else if (module->thName == "CAddTable") {1162String name = generateLayerName("torchCAddTable");1163std::vector<int> ids = net.getUnconnectedOutLayers();1164LayerParams params;1165params.set("operation", "sum");116611671168int id = net.addLayer(name, "Eltwise", params);11691170// Connect to the last number of unconnected layers.1171CV_Assert(!numUnconnectedLayers.empty());1172const int numInputs = numUnconnectedLayers.back();1173numUnconnectedLayers.pop_back();1174CV_Assert(numInputs <= ids.size());1175for (int i = 0; i < numInputs; i++)1176{1177net.connect(ids[ids.size() - numInputs + i], 0, id, i);1178}11791180addedModules.push_back(std::make_pair(id, module));1181return id;1182}1183else if (module->thName == "SpatialMaxUnpooling") {1184CV_Assert(module->params.has("indices_blob_id"));1185int indicesBlobId = module->params.get<int>("indices_blob_id");1186std::pair<int, Module*> poolingLayer;1187poolingLayer.first = -1;11881189for(int i = 0; i < addedModules.size(); i++)1190{1191if (addedModules[i].second->apiType == "Pooling" &&1192addedModules[i].second->params.has("indices_blob_id") &&1193addedModules[i].second->params.get<int>("indices_blob_id") == indicesBlobId)1194{1195poolingLayer = addedModules[i];1196break;1197}1198}11991200module->params.set("pool_k_h", poolingLayer.second->params.get<int>("kernel_h"));1201module->params.set("pool_k_w", poolingLayer.second->params.get<int>("kernel_w"));1202module->params.set("pool_stride_h", poolingLayer.second->params.get<int>("stride_h"));1203module->params.set("pool_stride_w", poolingLayer.second->params.get<int>("stride_w"));1204module->params.set("pool_pad_h", poolingLayer.second->params.get<int>("pad_h"));1205module->params.set("pool_pad_w", poolingLayer.second->params.get<int>("pad_w"));12061207String name = generateLayerName("torchMaxUnpooling");1208int id = net.addLayer(name, "MaxUnpool", module->params);1209net.connect(prevLayerId, 0, id, 0);12101211CV_Assert(poolingLayer.first != -1);1212net.connect(poolingLayer.first, 1, id, 1);12131214return id;1215}1216}12171218CV_Error(Error::StsInternal, "Unexpected torch container: " + module->thName);1219return -1;1220}12211222void populateNet(Net net_)1223{1224CV_TRACE_FUNCTION();12251226CV_Assert(rootModule == NULL);1227cv::Ptr<Module> rootModule_ = cv::makePtr<Module>("Sequential");1228rootModule = rootModule_.get();1229curModule = rootModule;12301231THFile_seek(file, 0);1232readObject();12331234net = net_;1235std::vector<std::pair<int, Module*> > addedModules;1236fill(rootModule, addedModules);12371238rootModule = NULL;1239curModule = NULL;1240}1241};12421243Mat readTorchBlob(const String &filename, bool isBinary)1244{1245TorchImporter importer(filename, isBinary);1246importer.readObject();1247CV_Assert(importer.tensors.size() == 1);12481249return importer.tensors.begin()->second;1250}12511252Net readNetFromTorch(const String &model, bool isBinary)1253{1254CV_TRACE_FUNCTION();12551256TorchImporter importer(model, isBinary);1257Net net;1258importer.populateNet(net);1259return net;1260}12611262CV__DNN_INLINE_NS_END1263}} // namespace126412651266