Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/dnn/src/torch/torch_importer.cpp
16337 views
1
/*M///////////////////////////////////////////////////////////////////////////////////////
2
//
3
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4
//
5
// By downloading, copying, installing or using the software you agree to this license.
6
// If you do not agree to this license, do not download, install,
7
// copy or use the software.
8
//
9
//
10
// License Agreement
11
// For Open Source Computer Vision Library
12
//
13
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14
// Third party copyrights are property of their respective owners.
15
//
16
// Redistribution and use in source and binary forms, with or without modification,
17
// are permitted provided that the following conditions are met:
18
//
19
// * Redistribution's of source code must retain the above copyright notice,
20
// this list of conditions and the following disclaimer.
21
//
22
// * Redistribution's in binary form must reproduce the above copyright notice,
23
// this list of conditions and the following disclaimer in the documentation
24
// and/or other materials provided with the distribution.
25
//
26
// * The name of the copyright holders may not be used to endorse or promote products
27
// derived from this software without specific prior written permission.
28
//
29
// This software is provided by the copyright holders and contributors "as is" and
30
// any express or implied warranties, including, but not limited to, the implied
31
// warranties of merchantability and fitness for a particular purpose are disclaimed.
32
// In no event shall the Intel Corporation or contributors be liable for any direct,
33
// indirect, incidental, special, exemplary, or consequential damages
34
// (including, but not limited to, procurement of substitute goods or services;
35
// loss of use, data, or profits; or business interruption) however caused
36
// and on any theory of liability, whether in contract, strict liability,
37
// or tort (including negligence or otherwise) arising in any way out of
38
// the use of this software, even if advised of the possibility of such damage.
39
//
40
//M*/
41
42
#include "../precomp.hpp"
43
#include <limits>
44
#include <set>
45
#include <map>
46
#include <algorithm>
47
#include <iostream>
48
#include <fstream>
49
50
#include "THDiskFile.h"
51
52
namespace cv {
53
namespace dnn {
54
CV__DNN_INLINE_NS_BEGIN
55
56
using namespace TH;
57
58
//#ifdef NDEBUG
59
static bool dbgPrint = false;
60
//#else
61
//static bool dbgPrint = true;
62
//#endif
63
64
enum LuaType
65
{
66
TYPE_NIL = 0,
67
TYPE_NUMBER = 1,
68
TYPE_STRING = 2,
69
TYPE_TABLE = 3,
70
TYPE_TORCH = 4,
71
TYPE_BOOLEAN = 5,
72
TYPE_FUNCTION = 6,
73
TYPE_RECUR_FUNCTION = 8,
74
LEGACY_TYPE_RECUR_FUNCTION = 7
75
};
76
77
// We use OpenCV's types to manage CV_ELEM_SIZE.
78
enum TorchType
79
{
80
TYPE_DOUBLE = CV_64F,
81
TYPE_FLOAT = CV_32F,
82
TYPE_BYTE = CV_8U,
83
TYPE_CHAR = CV_8S,
84
TYPE_SHORT = CV_16S,
85
TYPE_INT = CV_32S,
86
TYPE_LONG = CV_32SC2
87
};
88
89
template<typename T>
90
static String toString(const T &v)
91
{
92
std::ostringstream ss;
93
ss << v;
94
return ss.str();
95
}
96
97
static inline bool startsWith(const String &str, const char *substr)
98
{
99
return str.find(substr) == 0;
100
}
101
102
static inline bool endsWith(const String &str, const char *substr)
103
{
104
return str.rfind(substr) == str.length() - strlen(substr);
105
}
106
107
struct TorchImporter
108
{
109
typedef std::map<String, std::pair<int, Mat> > TensorsMap;
110
Net net;
111
112
cv::Ptr<THFile> file;
113
std::set<int> readedIndexes;
114
std::map<int, Mat> storages;
115
std::map<int, Mat> tensors;
116
// Stack with numbers of unconnected layers per scope (Sequential, ConcatTable etc.)
117
std::vector<int> numUnconnectedLayers;
118
119
struct Module
120
{
121
String thName, apiType;
122
dnn::LayerParams params;
123
std::vector<cv::Ptr<Module> > modules;
124
125
Module(const String &_thName, const String &_apiType = String())
126
: thName(_thName), apiType(_apiType) {}
127
};
128
129
Module *rootModule;
130
Module *curModule;
131
int moduleCounter;
132
133
TorchImporter(String filename, bool isBinary)
134
{
135
CV_TRACE_FUNCTION();
136
137
rootModule = curModule = NULL;
138
moduleCounter = 0;
139
140
file = cv::Ptr<THFile>(THDiskFile_new(filename, "r", 0), THFile_free);
141
CV_Assert(file && THFile_isOpened(file));
142
143
if (isBinary)
144
THFile_binary(file);
145
else
146
THFile_ascii(file);
147
}
148
149
/* Simple readers */
150
151
inline int readInt()
152
{
153
return THFile_readIntScalar(file);
154
}
155
156
inline long readLong()
157
{
158
return THFile_readLongScalar(file);
159
}
160
161
inline bool readBool()
162
{
163
return readInt() != 0;
164
}
165
166
inline double readDouble()
167
{
168
return THFile_readDoubleScalar(file);
169
}
170
171
inline String readString()
172
{
173
int size = THFile_readIntScalar(file);
174
String str(size, '\0');
175
THFile_readCharRaw(file, const_cast<char*>(str.c_str()), size);
176
return str;
177
}
178
179
inline String readTorchClassName()
180
{
181
String version = readString();
182
return startsWith(version, "V ") ? readString() : version;
183
}
184
185
inline void readFunction()
186
{
187
readString();
188
readObject();
189
}
190
191
void readTable(int index = -1)
192
{
193
index = (index < 0) ? readInt() : index;
194
195
if (readedIndexes.count(index))
196
return;
197
198
readedIndexes.insert(index);
199
200
int size = readInt();
201
202
for (int i = 0; i < size; i++)
203
{
204
readObject(); //key
205
readObject(); //value
206
}
207
}
208
209
/* Special readers */
210
211
static inline int parseTorchType(const String &str, const char *suffix, const char *prefix = "torch.")
212
{
213
if (startsWith(str, prefix) && endsWith(str, suffix))
214
{
215
String typeStr = str.substr(strlen(prefix), str.length() - strlen(prefix) - strlen(suffix));
216
217
if (typeStr == "Double")
218
return TYPE_DOUBLE;
219
else if (typeStr == "Float" || typeStr == "Cuda")
220
return TYPE_FLOAT;
221
else if (typeStr == "Byte")
222
return TYPE_BYTE;
223
else if (typeStr == "Char")
224
return TYPE_CHAR;
225
else if (typeStr == "Short")
226
return TYPE_SHORT;
227
else if (typeStr == "Int")
228
return TYPE_INT;
229
else if (typeStr == "Long")
230
return TYPE_LONG;
231
else
232
CV_Error(Error::StsNotImplemented, "Unknown type \"" + typeStr + "\" of torch class \"" + str + "\"");
233
}
234
235
return -1;
236
}
237
238
static int parseTensorType(const String &className)
239
{
240
return parseTorchType(className, "Tensor");
241
}
242
243
static int parseStorageType(const String &className)
244
{
245
return parseTorchType(className, "Storage");
246
}
247
248
void readTorchStorage(int index, int type = -1)
249
{
250
long size = readLong();
251
Mat storageMat;
252
253
switch (type)
254
{
255
case TYPE_FLOAT:
256
storageMat.create(1, size, CV_32F);
257
THFile_readFloatRaw(file, (float*)storageMat.data, size);
258
break;
259
case TYPE_DOUBLE:
260
storageMat.create(1, size, CV_64F);
261
THFile_readDoubleRaw(file, (double*)storageMat.data, size);
262
break;
263
case TYPE_CHAR:
264
storageMat.create(1, size, CV_8S);
265
THFile_readByteRaw(file, (uchar*)storageMat.data, size);
266
break;
267
case TYPE_BYTE:
268
storageMat.create(1, size, CV_8U);
269
THFile_readByteRaw(file, (uchar*)storageMat.data, size);
270
break;
271
case TYPE_SHORT:
272
storageMat.create(1, size, CV_16S);
273
THFile_readShortRaw(file, (short*)storageMat.data, size);
274
break;
275
case TYPE_INT:
276
storageMat.create(1, size, CV_32S);
277
THFile_readIntRaw(file, (int*)storageMat.data, size);
278
break;
279
case TYPE_LONG:
280
{
281
storageMat.create(1, size, CV_64F); //handle LongStorage as CV_64F Mat
282
double *buf = storageMat.ptr<double>();
283
THFile_readLongRaw(file, (int64*)buf, size);
284
285
for (size_t i = (size_t)size; i-- > 0; )
286
buf[i] = ((int64*)buf)[i];
287
break;
288
}
289
default:
290
CV_Error(Error::StsInternal, "");
291
break;
292
}
293
294
storages.insert(std::make_pair(index, storageMat));
295
}
296
297
void readTorchTable(Dict &scalarParams, TensorsMap &tensorParams)
298
{
299
int luaType = readInt();
300
int index = readInt();
301
302
CV_Assert(luaType == TYPE_TABLE && readedIndexes.count(index) == 0);
303
readedIndexes.insert(index);
304
305
long fpos;
306
int numPairs = readInt();
307
308
for (int i = 0; i < numPairs; i++)
309
{
310
fpos = THFile_position(file);
311
int ktype = readInt();
312
313
if (ktype != TYPE_STRING) //skip non-string fileds
314
{
315
THFile_seek(file, fpos);
316
readObject(); //key
317
readObject(); //value
318
continue;
319
}
320
321
String key = readString();
322
if (dbgPrint)
323
std::cout << i << "th key: " << key << "\n";
324
325
fpos = THFile_position(file);
326
int vtype = readInt();
327
328
if (vtype == TYPE_TORCH)
329
{
330
int index = readInt();
331
int numModules = curModule->modules.size();
332
readTorchObject(index);
333
334
if (tensors.count(index)) //tensor was read
335
{
336
tensorParams.insert(std::make_pair(key, std::make_pair(index, tensors[index])));
337
}
338
else if (storages.count(index)) //storage was read
339
{
340
Mat &matStorage = storages[index];
341
Mat matCasted;
342
matStorage.convertTo(matCasted, CV_64F);
343
344
DictValue scalar = DictValue::arrayReal(matCasted.ptr<double>(), matCasted.total());
345
scalarParams.set(key, scalar);
346
}
347
else
348
{
349
// Only tensors and scalars are supported for table fields.
350
// i.e. nn.Inception has field `transfer` which is an
351
// activation layer. So we remove added modules as readTorchObject(index).
352
while (curModule->modules.size() > numModules)
353
curModule->modules.pop_back();
354
}
355
}
356
else if (vtype == TYPE_NUMBER)
357
{
358
scalarParams.set(key, readDouble());
359
}
360
else if (vtype == TYPE_STRING)
361
{
362
scalarParams.set(key, readString());
363
}
364
else if (vtype == TYPE_BOOLEAN)
365
{
366
scalarParams.set(key, readBool());
367
}
368
else
369
{
370
THFile_seek(file, fpos);
371
readObject();
372
}
373
}
374
375
//Debug output
376
if (dbgPrint)
377
{
378
std::cout << "scalarParams:\n";
379
std::cout << scalarParams;
380
381
std::cout << "#" << tensorParams.size() << " tensorParams:\n";
382
std::map<String,std::pair<int, Mat> >::const_iterator it;
383
for (it = tensorParams.begin(); it != tensorParams.end(); it++)
384
std::cout << it->first << ": Tensor " << it->second.second.size << "\n";
385
}
386
}
387
388
void readTorchTensor(int indexTensor, int typeTensor)
389
{
390
int ndims = readInt();
391
AutoBuffer<int64, 4> sizes(ndims);
392
AutoBuffer<int64, 4> steps(ndims);
393
THFile_readLongRaw(file, sizes.data(), ndims);
394
THFile_readLongRaw(file, steps.data(), ndims);
395
long offset = readLong() - 1;
396
397
//read Storage
398
int typeidx = readInt();
399
CV_Assert(typeidx == TYPE_TORCH || (typeidx == TYPE_NIL && ndims == 0));
400
401
if (typeidx == TYPE_NIL)
402
{
403
tensors.insert(std::make_pair(indexTensor, Mat()));
404
return;
405
}
406
407
int indexStorage = readInt();
408
if (readedIndexes.count(indexStorage) == 0)
409
{
410
String className = readTorchClassName();
411
int typeStorage = parseStorageType(className);
412
CV_Assert(typeStorage >= 0 && typeTensor == typeStorage);
413
readTorchStorage(indexStorage, typeStorage);
414
typeTensor = storages[indexStorage].type();
415
readedIndexes.insert(indexStorage);
416
}
417
418
//small check
419
size_t requireElems = (size_t)offset + (size_t)steps[0] * (size_t)sizes[0];
420
size_t storageElems = storages[indexStorage].total();
421
if (requireElems > storageElems)
422
CV_Error(Error::StsBadSize, "Storage has insufficient number of elements for requested Tensor");
423
424
//convert sizes
425
AutoBuffer<int, 4> isizes(ndims);
426
AutoBuffer<size_t, 4> ssteps(ndims);
427
for (int i = ndims - 1; i >= 0; i--)
428
{
429
isizes[i] = (int)sizes[i];
430
ssteps[i] = (size_t)steps[i] * CV_ELEM_SIZE(typeTensor);
431
}
432
433
//allocate Blob
434
Mat srcMat(ndims, isizes.data(), typeTensor , storages[indexStorage].ptr() + offset*CV_ELEM_SIZE(typeTensor), ssteps.data());
435
int dstType = CV_32F;
436
437
Mat blob;
438
srcMat.convertTo(blob, dstType);
439
440
tensors.insert(std::make_pair(indexTensor, blob));
441
}
442
443
static bool isNNClass(const String &className, String &nnName)
444
{
445
const char *prefixes[] = {"nn.", "cunn.", "cudnn.", "fbcunn.", NULL};
446
447
for (int i = 0; prefixes[i]; i++)
448
{
449
if (startsWith(className, prefixes[i]))
450
{
451
nnName = className.substr(strlen(prefixes[i]));
452
return true;
453
}
454
}
455
456
return false;
457
}
458
459
static void convertTorchKernelsParams(const Dict &torchParams, cv::dnn::LayerParams &layerParams)
460
{
461
layerParams.set("kernel_h", torchParams.get<int>("kH"));
462
layerParams.set("kernel_w", torchParams.get<int>("kW"));
463
layerParams.set("stride_h", torchParams.get<int>("dH"));
464
layerParams.set("stride_w", torchParams.get<int>("dW"));
465
layerParams.set("pad_h", torchParams.get<int>("padH", 0));
466
layerParams.set("pad_w", torchParams.get<int>("padW", 0));
467
}
468
469
void readTorchObject(int index)
470
{
471
if(readedIndexes.count(index))
472
return;
473
474
String className = readTorchClassName();
475
String nnName;
476
477
if (dbgPrint)
478
std::cout << "Class: " << className << std::endl;
479
480
int type;
481
if ( (type = parseTensorType(className)) >= 0 ) //is Tensor
482
{
483
readTorchTensor(index, type);
484
}
485
else if ( (type = parseStorageType(className)) >= 0 ) //is Storage
486
{
487
readTorchStorage(index, type);
488
}
489
else if (isNNClass(className, nnName))
490
{
491
Dict scalarParams;
492
TensorsMap tensorParams;
493
494
cv::Ptr<Module> newModule(new Module(nnName));
495
cv::dnn::LayerParams &layerParams = newModule->params;
496
497
layerParams.set("torch_index", index);
498
499
if (nnName == "Sequential" || nnName == "Parallel" ||
500
nnName == "Concat" || nnName == "ConcatTable" || nnName == "JoinTable" ||
501
nnName == "DepthConcat" || nnName == "Inception")
502
{
503
Module *parentModule = curModule;
504
curModule->modules.push_back(newModule);
505
curModule = newModule;
506
readTorchTable(scalarParams, tensorParams);
507
curModule = parentModule;
508
509
if (nnName == "Parallel")
510
{
511
layerParams.set("inputDimension", scalarParams.get<int>("inputDimension"));
512
layerParams.set("outputDimension", scalarParams.get<int>("outputDimension"));
513
}
514
else if (nnName == "Concat" || nnName == "JoinTable" || nnName == "DepthConcat")
515
{
516
layerParams.set("dimension", scalarParams.get<int>("dimension"));
517
}
518
}
519
else if (nnName == "SpatialConvolution" || nnName == "SpatialConvolutionMM")
520
{
521
newModule->apiType = "Convolution";
522
readTorchTable(scalarParams, tensorParams);
523
524
CV_Assert(tensorParams.count("weight"));
525
layerParams.blobs.push_back(tensorParams["weight"].second);
526
527
bool bias = tensorParams.count("bias") != 0;
528
layerParams.set("bias_term", bias);
529
if (bias)
530
layerParams.blobs.push_back(tensorParams["bias"].second);
531
532
layerParams.set("num_output", scalarParams.get<int>("nOutputPlane"));
533
convertTorchKernelsParams(scalarParams, layerParams);
534
535
if (nnName == "SpatialConvolutionMM")
536
{
537
// Split weights from a [ outCh x inCh*kH*kW ] 2D matrix
538
// onto a 4D [ outCh x inCh x kH x kW ] blob.
539
CV_Assert(layerParams.blobs[0].dims == 2);
540
const int kernel = layerParams.blobs[0].size[1]; // inCh * kH * kW
541
MatShape kernelShape(4);
542
kernelShape[0] = layerParams.blobs[0].size[0]; // outCh.
543
kernelShape[2] = layerParams.get<int>("kernel_h");
544
kernelShape[3] = layerParams.get<int>("kernel_w");
545
kernelShape[1] = kernel / (kernelShape[2] * kernelShape[3]); // inCh.
546
layerParams.blobs[0] = layerParams.blobs[0].reshape(1, kernelShape);
547
}
548
curModule->modules.push_back(newModule);
549
}
550
else if (nnName == "SpatialLPPooling")
551
{
552
// nn.Sequential {
553
// [input -> (1) -> (2) -> output]
554
// (1): nn.Sequential {
555
// [input -> (1) -> (2) -> (3) -> (4) -> output]
556
// (1): nn.Power
557
// (2): nn.SpatialAveragePooling(...)
558
// (3): nn.MulConstant
559
// (4): nn.Power
560
// }
561
// (2): nn.Sigmoid
562
// }
563
// nn.SpatialLPPooling is just a table so we skip it.
564
readTorchTable(scalarParams, tensorParams);
565
}
566
else if (nnName == "SpatialMaxPooling" || nnName == "SpatialAveragePooling")
567
{
568
newModule->apiType = "Pooling";
569
readTorchTable(scalarParams, tensorParams);
570
571
if (nnName == "SpatialMaxPooling") {
572
layerParams.set("pool", "MAX");
573
layerParams.set("indices_blob_id", tensorParams["indices"].first);
574
}
575
if (nnName == "SpatialAveragePooling")
576
{
577
layerParams.set("pool", "AVE");
578
layerParams.set("ave_pool_padded_area", scalarParams.has("count_include_pad") &&
579
scalarParams.get<bool>("count_include_pad"));
580
}
581
convertTorchKernelsParams(scalarParams, layerParams);
582
583
CV_Assert(scalarParams.has("ceil_mode"));
584
layerParams.set("ceil_mode", scalarParams.get<bool>("ceil_mode"));
585
586
curModule->modules.push_back(newModule);
587
}
588
else if (nnName == "Linear")
589
{
590
newModule->apiType = "InnerProduct";
591
readTorchTable(scalarParams, tensorParams);
592
593
CV_Assert(tensorParams.count("weight"));
594
Mat weightBlob = tensorParams["weight"].second;
595
layerParams.blobs.push_back(weightBlob);
596
597
bool bias = tensorParams.count("bias") != 0;
598
if (bias)
599
layerParams.blobs.push_back(tensorParams["bias"].second);
600
layerParams.set("bias_term", bias);
601
602
layerParams.set("num_output", weightBlob.size[0]);
603
curModule->modules.push_back(newModule);
604
}
605
else if (nnName == "Reshape" || nnName == "View")
606
{
607
newModule->apiType = "Reshape";
608
609
readTorchTable(scalarParams, tensorParams);
610
CV_Assert(scalarParams.has("size"));
611
612
DictValue dimParam = scalarParams.get("size");
613
layerParams.set("dim", dimParam);
614
615
int axis = (int)scalarParams.get<bool>("batchMode", true);
616
layerParams.set("axis", axis);
617
618
curModule->modules.push_back(newModule);
619
}
620
else if (nnName == "ReLU")
621
{
622
curModule->modules.push_back(cv::Ptr<Module>(new Module(nnName, "ReLU")));
623
readObject();
624
}
625
else if (nnName == "Tanh")
626
{
627
curModule->modules.push_back(cv::Ptr<Module>(new Module(nnName, "TanH")));
628
readObject();
629
}
630
else if (nnName == "Sigmoid")
631
{
632
curModule->modules.push_back(cv::Ptr<Module>(new Module(nnName, "Sigmoid")));
633
readObject();
634
}
635
else if (nnName == "SpatialBatchNormalization" || nnName == "InstanceNormalization" ||
636
nnName == "BatchNormalization")
637
{
638
newModule->apiType = "BatchNorm";
639
readTorchTable(scalarParams, tensorParams);
640
641
CV_Assert(scalarParams.has("eps"));
642
float eps = float(scalarParams.get<double>("eps"));
643
layerParams.set("eps", eps);
644
645
if (tensorParams.count("running_mean"))
646
{
647
layerParams.blobs.push_back(tensorParams["running_mean"].second);
648
}
649
else
650
{
651
CV_Assert(scalarParams.has("nOutput"));
652
layerParams.blobs.push_back(Mat::zeros(1, scalarParams.get<int>("nOutput"), CV_32F));
653
}
654
655
if (tensorParams.count("running_var"))
656
{
657
layerParams.blobs.push_back(tensorParams["running_var"].second);
658
}
659
else if (tensorParams.count("running_std"))
660
{
661
layerParams.blobs.push_back(tensorParams["running_std"].second);
662
pow(layerParams.blobs.back(), -2, layerParams.blobs.back());
663
subtract(layerParams.blobs.back(), eps, layerParams.blobs.back());
664
}
665
else
666
{
667
CV_Assert(scalarParams.has("nOutput"));
668
layerParams.blobs.push_back(Mat::ones(1, scalarParams.get<int>("nOutput"), CV_32F));
669
}
670
671
if (tensorParams.count("weight"))
672
{
673
layerParams.set("has_weight", true);
674
layerParams.blobs.push_back(tensorParams["weight"].second);
675
}
676
677
if (tensorParams.count("bias"))
678
{
679
layerParams.set("has_bias", true);
680
layerParams.blobs.push_back(tensorParams["bias"].second);
681
}
682
683
if (nnName == "InstanceNormalization")
684
{
685
cv::Ptr<Module> mvnModule(new Module(nnName));
686
mvnModule->apiType = "MVN";
687
curModule->modules.push_back(mvnModule);
688
689
layerParams.blobs[0].setTo(0); // batch norm's mean
690
layerParams.blobs[1].setTo(1); // batch norm's std
691
}
692
693
curModule->modules.push_back(newModule);
694
}
695
else if (nnName == "PReLU")
696
{
697
readTorchTable(scalarParams, tensorParams);
698
699
CV_Assert(tensorParams.count("weight"));
700
701
size_t outputChannels = static_cast<int>(scalarParams.get<double>("nOutputPlane"));
702
if (outputChannels) {
703
704
CV_Assert(tensorParams["weight"].second.total() == outputChannels);
705
layerParams.blobs.push_back(tensorParams["weight"].second);
706
707
newModule->apiType = "ChannelsPReLU";
708
}
709
else {
710
CV_Assert(tensorParams["weight"].second.total() == 1);
711
float negative_slope = *tensorParams["weight"].second.ptr<float>();
712
layerParams.set("negative_slope", negative_slope);
713
714
newModule->apiType = "ReLU";
715
}
716
717
curModule->modules.push_back(newModule);
718
}
719
else if (nnName == "SpatialDropout" || nnName == "Dropout")
720
{
721
readTorchTable(scalarParams, tensorParams);
722
CV_Assert(scalarParams.has("p"));
723
724
if (scalarParams.has("v2") && scalarParams.get<bool>("v2"))
725
{
726
newModule->apiType = "Identity";
727
}
728
else
729
{
730
float scale = 1 - scalarParams.get<double>("p");
731
732
CV_Assert(scale > 0);
733
734
newModule->apiType = "Power";
735
layerParams.set("scale", scale);
736
}
737
curModule->modules.push_back(newModule);
738
}
739
// TotalVariation layer is from fast-neural-style project: https://github.com/jcjohnson/fast-neural-style
740
// It's a loss function that has an Identity forward.
741
else if (nnName == "Identity" || nnName == "TotalVariation")
742
{
743
readTorchTable(scalarParams, tensorParams);
744
newModule->apiType = "Identity";
745
curModule->modules.push_back(newModule);
746
}
747
else if (nnName == "Normalize")
748
{
749
readTorchTable(scalarParams, tensorParams);
750
CV_Assert(scalarParams.has("p"));
751
752
layerParams.set("p", scalarParams.get<float>("p"));
753
if (scalarParams.has("eps"))
754
layerParams.set("eps", scalarParams.get<float>("eps"));
755
756
newModule->apiType = "Normalize";
757
curModule->modules.push_back(newModule);
758
}
759
else if (nnName == "Padding")
760
{
761
readTorchTable(scalarParams, tensorParams);
762
newModule->apiType = "Padding";
763
764
CV_Assert(scalarParams.has("pad") && scalarParams.has("dim"));
765
if (scalarParams.has("index") && scalarParams.get<int>("index") != 1)
766
CV_Error(Error::StsNotImplemented, "Padding with offset is not implemented");
767
768
if (scalarParams.has("value"))
769
layerParams.set("value", scalarParams.get<float>("value"));
770
771
if (scalarParams.has("nInputDim"))
772
layerParams.set("input_dims", scalarParams.get<int>("nInputDim"));
773
774
int dim = scalarParams.get<int>("dim") - 1; // In Lua we start from 1.
775
int pad = scalarParams.get<int>("pad");
776
777
std::vector<int> paddings((dim + 1) * 2, 0);
778
if (pad > 0)
779
paddings[dim * 2 + 1] = pad; // Pad after (right).
780
else
781
paddings[dim * 2] = -pad; // Pad before (left).
782
layerParams.set("paddings", DictValue::arrayInt<int*>(&paddings[0], paddings.size()));
783
784
curModule->modules.push_back(newModule);
785
}
786
else if (nnName == "CAddTable")
787
{
788
curModule->modules.push_back(newModule);
789
readObject();
790
}
791
else if (nnName == "SpatialDilatedConvolution")
792
{
793
readTorchTable(scalarParams, tensorParams);
794
newModule->apiType = "Convolution";
795
CV_Assert(scalarParams.has("padW") &&
796
scalarParams.has("padH")&&
797
scalarParams.has("dW")&&
798
scalarParams.has("dH")&&
799
scalarParams.has("dilationW")&&
800
scalarParams.has("dilationH")&&
801
scalarParams.has("kW")&&
802
scalarParams.has("kH")&&
803
scalarParams.has("nOutputPlane"));
804
805
layerParams.set("kernel_w", static_cast<int>(scalarParams.get<double>("kW")));
806
layerParams.set("kernel_h", static_cast<int>(scalarParams.get<double>("kH")));
807
layerParams.set("pad_w", static_cast<int>(scalarParams.get<double>("padW")));
808
layerParams.set("pad_h", static_cast<int>(scalarParams.get<double>("padH")));
809
layerParams.set("stride_w", static_cast<int>(scalarParams.get<double>("dW")));
810
layerParams.set("stride_h", static_cast<int>(scalarParams.get<double>("dH")));
811
layerParams.set("dilation_w", static_cast<int>(scalarParams.get<double>("dilationW")));
812
layerParams.set("dilation_h", static_cast<int>(scalarParams.get<double>("dilationH")));
813
layerParams.set("num_output", static_cast<int>(scalarParams.get<double>("nOutputPlane")));
814
815
layerParams.blobs.push_back(tensorParams["weight"].second);
816
817
bool bias = tensorParams.count("bias");
818
layerParams.set("bias_term", bias);
819
if (bias)
820
layerParams.blobs.push_back(tensorParams["bias"].second);
821
822
curModule->modules.push_back(newModule);
823
}
824
else if (nnName == "SpatialFullConvolution")
825
{
826
readTorchTable(scalarParams, tensorParams);
827
newModule->apiType = "Deconvolution";
828
CV_Assert(scalarParams.has("padW") &&
829
scalarParams.has("padH")&&
830
scalarParams.has("dW")&&
831
scalarParams.has("dH")&&
832
scalarParams.has("adjW")&&
833
scalarParams.has("adjH")&&
834
scalarParams.has("kW")&&
835
scalarParams.has("kH")&&
836
scalarParams.has("nOutputPlane"));
837
838
layerParams.set("kernel_w", static_cast<int>(scalarParams.get<double>("kW")));
839
layerParams.set("kernel_h", static_cast<int>(scalarParams.get<double>("kH")));
840
layerParams.set("pad_w", static_cast<int>(scalarParams.get<double>("padW")));
841
layerParams.set("pad_h", static_cast<int>(scalarParams.get<double>("padH")));
842
layerParams.set("stride_w", static_cast<int>(scalarParams.get<double>("dW")));
843
layerParams.set("stride_h", static_cast<int>(scalarParams.get<double>("dH")));
844
layerParams.set("adj_w", static_cast<int>(scalarParams.get<double>("adjW")));
845
layerParams.set("adj_h", static_cast<int>(scalarParams.get<double>("adjH")));
846
layerParams.set("num_output", static_cast<int>(scalarParams.get<double>("nOutputPlane")));
847
848
layerParams.blobs.push_back(tensorParams["weight"].second);
849
850
bool bias = tensorParams.count("bias");
851
layerParams.set("bias_term", bias);
852
if (bias)
853
layerParams.blobs.push_back(tensorParams["bias"].second);
854
855
curModule->modules.push_back(newModule);
856
}
857
else if (nnName == "SpatialMaxUnpooling")
858
{
859
readTorchTable(scalarParams, tensorParams);
860
CV_Assert(tensorParams.count("indices"));
861
862
layerParams.set("indices_blob_id", tensorParams["indices"].first);
863
curModule->modules.push_back(newModule);
864
}
865
else if (nnName == "SoftMax")
866
{
867
newModule->apiType = "SoftMax";
868
curModule->modules.push_back(newModule);
869
}
870
else if (nnName == "LogSoftMax")
871
{
872
newModule->apiType = "SoftMax";
873
layerParams.set("log_softmax", true);
874
curModule->modules.push_back(newModule);
875
}
876
else if (nnName == "SpatialCrossMapLRN")
877
{
878
newModule->apiType = "LRN";
879
readTorchTable(scalarParams, tensorParams);
880
881
CV_Assert(scalarParams.has("alpha"));
882
CV_Assert(scalarParams.has("beta"));
883
CV_Assert(scalarParams.has("k"));
884
CV_Assert(scalarParams.has("size"));
885
886
layerParams.set("norm_region", "ACROSS_CHANNELS");
887
layerParams.set("alpha", scalarParams.get<float>("alpha"));
888
layerParams.set("beta", scalarParams.get<float>("beta"));
889
layerParams.set("bias", scalarParams.get<float>("k"));
890
layerParams.set("local_size", scalarParams.get<int>("size"));
891
layerParams.set("norm_by_size", true);
892
893
curModule->modules.push_back(newModule);
894
}
895
else if (nnName == "Square" || nnName == "Sqrt" || nnName == "Power")
896
{
897
readTorchTable(scalarParams, tensorParams);
898
899
float power;
900
if (nnName == "Square") power = 2.0f;
901
else if (nnName == "Sqrt") power = 0.5f;
902
else if (nnName == "Power") power = scalarParams.get<float>("pow", 1.0f);
903
904
newModule->apiType = "Power";
905
layerParams.set("power", power);
906
curModule->modules.push_back(newModule);
907
}
908
else if (nnName == "MulConstant")
909
{
910
readTorchTable(scalarParams, tensorParams);
911
CV_Assert(scalarParams.has("constant_scalar"));
912
newModule->apiType = "Power";
913
layerParams.set("scale", scalarParams.get<float>("constant_scalar"));
914
curModule->modules.push_back(newModule);
915
}
916
else if (nnName == "SpatialZeroPadding" || nnName == "SpatialReflectionPadding")
917
{
918
readTorchTable(scalarParams, tensorParams);
919
CV_Assert_N(scalarParams.has("pad_l"), scalarParams.has("pad_r"),
920
scalarParams.has("pad_t"), scalarParams.has("pad_b"));
921
int padTop = scalarParams.get<int>("pad_t");
922
int padLeft = scalarParams.get<int>("pad_l");
923
int padRight = scalarParams.get<int>("pad_r");
924
int padBottom = scalarParams.get<int>("pad_b");
925
if (padTop < 0 || padLeft < 0 || padRight < 0 || padBottom < 0)
926
CV_Error(Error::StsNotImplemented, "SpatialZeroPadding in cropping mode is not implemented");
927
928
newModule->apiType = "Padding";
929
930
// Torch's SpatialZeroPadding works with 3- or 4-dimensional input.
931
// So we add parameter input_dims=3 to ignore batch dimension if it will be.
932
std::vector<int> paddings(6, 0); // CHW
933
paddings[2] = padTop;
934
paddings[3] = padBottom;
935
paddings[4] = padLeft;
936
paddings[5] = padRight;
937
layerParams.set("paddings", DictValue::arrayInt<int*>(&paddings[0], paddings.size()));
938
layerParams.set("input_dims", 3);
939
940
if (nnName == "SpatialReflectionPadding")
941
layerParams.set("type", "reflect");
942
943
curModule->modules.push_back(newModule);
944
}
945
else if (nnName == "ShaveImage")
946
{
947
// ShaveImage layer is from fast-neural-style project: https://github.com/jcjohnson/fast-neural-style
948
// It may be mapped to Slice layer.
949
readTorchTable(scalarParams, tensorParams);
950
CV_Assert(scalarParams.has("size"));
951
int size = scalarParams.get<int>("size");
952
953
int begins[] = {0, 0, size, size};
954
int ends[] = {-1, -1, -size - 1, -size - 1};
955
956
newModule->apiType = "Slice";
957
layerParams.set("begin", DictValue::arrayInt<int*>(&begins[0], 4));
958
layerParams.set("end", DictValue::arrayInt<int*>(&ends[0], 4));
959
curModule->modules.push_back(newModule);
960
}
961
else if (nnName == "SpatialUpSamplingNearest")
962
{
963
readTorchTable(scalarParams, tensorParams);
964
CV_Assert(scalarParams.has("scale_factor"));
965
int scale_factor = scalarParams.get<int>("scale_factor");
966
newModule->apiType = "Resize";
967
layerParams.set("interpolation", "nearest");
968
layerParams.set("zoom_factor", scale_factor);
969
curModule->modules.push_back(newModule);
970
}
971
else
972
{
973
// Importer does not know how to map Torch's layer type to an OpenCV's one.
974
// However we parse all the parameters to let user create a custom layer.
975
readTorchTable(scalarParams, tensorParams);
976
for (std::map<String, DictValue>::const_iterator it = scalarParams.begin();
977
it != scalarParams.end(); ++it)
978
{
979
layerParams.set(it->first, it->second);
980
}
981
for (std::map<String, std::pair<int, Mat> >::iterator it = tensorParams.begin();
982
it != tensorParams.end(); ++it)
983
{
984
layerParams.blobs.push_back(it->second.second);
985
}
986
newModule->apiType = nnName;
987
curModule->modules.push_back(newModule);
988
}
989
}
990
else
991
{
992
CV_Error(Error::StsNotImplemented, "Unsupported Torch class \"" + className + "\"");
993
}
994
995
readedIndexes.insert(index);
996
}
997
998
void readObject()
999
{
1000
int typeidx = readInt();
1001
1002
if (typeidx == TYPE_TORCH)
1003
{
1004
int index = readInt();
1005
readTorchObject(index);
1006
readedIndexes.insert(index);
1007
}
1008
else if (typeidx == TYPE_NIL)
1009
return;
1010
else if (typeidx == TYPE_NUMBER)
1011
readDouble();
1012
else if (typeidx == TYPE_BOOLEAN)
1013
readBool();
1014
else if (typeidx == TYPE_STRING)
1015
readString();
1016
else if (typeidx == TYPE_TABLE)
1017
readTable();
1018
else
1019
CV_Error(Error::StsNotImplemented, "Unsupported Lua type");
1020
}
1021
1022
inline String generateLayerName(const String &label = String())
1023
{
1024
return "l" + toString(++this->moduleCounter) + "_" + label;
1025
}
1026
1027
int fill(Module *module, std::vector<std::pair<int, Module*> >& addedModules, int prevLayerId = 0, int prevOutNum = 0)
1028
{
1029
if (module == NULL)
1030
return prevLayerId;
1031
1032
if (module->apiType.length())
1033
{
1034
int newLayerId = net.addLayer(generateLayerName(module->apiType), module->apiType, module->params);
1035
net.connect(prevLayerId, prevOutNum, newLayerId, 0);
1036
addedModules.push_back(std::make_pair(newLayerId, module));
1037
return newLayerId;
1038
}
1039
else
1040
{
1041
if (module->thName == "Sequential" || module->thName == "Inception")
1042
{
1043
for (size_t i = 0; i < module->modules.size(); i++)
1044
{
1045
prevLayerId = fill(module->modules[i], addedModules, prevLayerId, prevOutNum);
1046
prevOutNum = 0;
1047
}
1048
return prevLayerId;
1049
}
1050
else if (module->thName == "Concat")
1051
{
1052
int newId, mergeId;
1053
LayerParams mergeParams;
1054
mergeParams.set("axis", module->params.get<int>("dimension") - 1);
1055
1056
std::vector<int> branchIds;
1057
for (int i = 0; i < (int)module->modules.size(); i++)
1058
{
1059
newId = fill(module->modules[i], addedModules, prevLayerId, prevOutNum);
1060
branchIds.push_back(newId);
1061
}
1062
1063
moduleCounter += 1; // Skip split layer creation. See https://github.com/opencv/opencv/pull/9384.
1064
mergeId = net.addLayer(generateLayerName("torchMerge"), "Concat", mergeParams);
1065
1066
for (int i = 0; i < branchIds.size(); i++)
1067
{
1068
net.connect(branchIds[i], 0, mergeId, i);
1069
}
1070
1071
addedModules.push_back(std::make_pair(mergeId, module));
1072
return mergeId;
1073
}
1074
else if (module->thName == "DepthConcat")
1075
{
1076
int newId, mergeId;
1077
LayerParams mergeParams;
1078
mergeParams.set("axis", module->params.get<int>("dimension") - 1);
1079
mergeParams.set("padding", true);
1080
1081
std::vector<int> branchIds;
1082
for (int i = 0; i < (int)module->modules.size(); i++)
1083
{
1084
newId = fill(module->modules[i], addedModules, prevLayerId, prevOutNum);
1085
branchIds.push_back(newId);
1086
}
1087
1088
mergeId = net.addLayer(generateLayerName("torchMerge"), "Concat", mergeParams);
1089
1090
for (int i = 0; i < branchIds.size(); i++)
1091
{
1092
net.connect(branchIds[i], 0, mergeId, i);
1093
}
1094
1095
addedModules.push_back(std::make_pair(mergeId, module));
1096
return mergeId;
1097
}
1098
else if (module->thName == "Parallel")
1099
{
1100
int newId, splitId, mergeId, reshapeId;
1101
1102
LayerParams splitParams, mergeParams, reshapeParams;
1103
splitParams.set("axis", module->params.get<int>("inputDimension") - 1);
1104
mergeParams.set("axis", module->params.get<int>("outputDimension") - 1);
1105
reshapeParams.set("axis", splitParams.get<int>("axis"));
1106
reshapeParams.set("num_axes", 1);
1107
1108
splitId = net.addLayer(generateLayerName("torchSplit"), "Slice", splitParams);
1109
reshapeId = net.addLayer(generateLayerName("torchReshape"), "Reshape", reshapeParams);
1110
net.connect(prevLayerId, prevOutNum, splitId, 0);
1111
1112
std::vector<int> branchIds;
1113
for (int i = 0; i < (int)module->modules.size(); i++)
1114
{
1115
net.connect(splitId, i, reshapeId, i);
1116
newId = fill(module->modules[i], addedModules, reshapeId, i);
1117
branchIds.push_back(newId);
1118
}
1119
1120
mergeId = net.addLayer(generateLayerName("torchMerge"), "Concat", mergeParams);
1121
1122
for (int i = 0; i < branchIds.size(); i++)
1123
{
1124
net.connect(branchIds[i], 0, mergeId, i);
1125
}
1126
1127
addedModules.push_back(std::make_pair(mergeId, module));
1128
return mergeId;
1129
}
1130
else if (module->thName == "ConcatTable") {
1131
int newId = -1;
1132
moduleCounter += 1; // Skip split layer creation. See https://github.com/opencv/opencv/pull/9384.
1133
for (int i = 0; i < (int)module->modules.size(); i++)
1134
{
1135
newId = fill(module->modules[i], addedModules, prevLayerId, prevOutNum);
1136
}
1137
numUnconnectedLayers.push_back(module->modules.size());
1138
return newId;
1139
}
1140
else if (module->thName == "JoinTable") {
1141
std::vector<int> ids = net.getUnconnectedOutLayers();
1142
1143
int mergeId;
1144
LayerParams mergeParams;
1145
mergeParams.set("axis", module->params.get<int>("dimension") - 1);
1146
1147
mergeId = net.addLayer(generateLayerName("torchMerge"), "Concat", mergeParams);
1148
addedModules.push_back(std::make_pair(mergeId, module));
1149
1150
// Connect to the last number of unconnected layers.
1151
CV_Assert(!numUnconnectedLayers.empty());
1152
const int numInputs = numUnconnectedLayers.back();
1153
numUnconnectedLayers.pop_back();
1154
CV_Assert(numInputs <= ids.size());
1155
for (int i = 0; i < numInputs; i++)
1156
{
1157
net.connect(ids[ids.size() - numInputs + i], 0, mergeId, i);
1158
}
1159
1160
return mergeId;
1161
}
1162
else if (module->thName == "CAddTable") {
1163
String name = generateLayerName("torchCAddTable");
1164
std::vector<int> ids = net.getUnconnectedOutLayers();
1165
LayerParams params;
1166
params.set("operation", "sum");
1167
1168
1169
int id = net.addLayer(name, "Eltwise", params);
1170
1171
// Connect to the last number of unconnected layers.
1172
CV_Assert(!numUnconnectedLayers.empty());
1173
const int numInputs = numUnconnectedLayers.back();
1174
numUnconnectedLayers.pop_back();
1175
CV_Assert(numInputs <= ids.size());
1176
for (int i = 0; i < numInputs; i++)
1177
{
1178
net.connect(ids[ids.size() - numInputs + i], 0, id, i);
1179
}
1180
1181
addedModules.push_back(std::make_pair(id, module));
1182
return id;
1183
}
1184
else if (module->thName == "SpatialMaxUnpooling") {
1185
CV_Assert(module->params.has("indices_blob_id"));
1186
int indicesBlobId = module->params.get<int>("indices_blob_id");
1187
std::pair<int, Module*> poolingLayer;
1188
poolingLayer.first = -1;
1189
1190
for(int i = 0; i < addedModules.size(); i++)
1191
{
1192
if (addedModules[i].second->apiType == "Pooling" &&
1193
addedModules[i].second->params.has("indices_blob_id") &&
1194
addedModules[i].second->params.get<int>("indices_blob_id") == indicesBlobId)
1195
{
1196
poolingLayer = addedModules[i];
1197
break;
1198
}
1199
}
1200
1201
module->params.set("pool_k_h", poolingLayer.second->params.get<int>("kernel_h"));
1202
module->params.set("pool_k_w", poolingLayer.second->params.get<int>("kernel_w"));
1203
module->params.set("pool_stride_h", poolingLayer.second->params.get<int>("stride_h"));
1204
module->params.set("pool_stride_w", poolingLayer.second->params.get<int>("stride_w"));
1205
module->params.set("pool_pad_h", poolingLayer.second->params.get<int>("pad_h"));
1206
module->params.set("pool_pad_w", poolingLayer.second->params.get<int>("pad_w"));
1207
1208
String name = generateLayerName("torchMaxUnpooling");
1209
int id = net.addLayer(name, "MaxUnpool", module->params);
1210
net.connect(prevLayerId, 0, id, 0);
1211
1212
CV_Assert(poolingLayer.first != -1);
1213
net.connect(poolingLayer.first, 1, id, 1);
1214
1215
return id;
1216
}
1217
}
1218
1219
CV_Error(Error::StsInternal, "Unexpected torch container: " + module->thName);
1220
return -1;
1221
}
1222
1223
void populateNet(Net net_)
1224
{
1225
CV_TRACE_FUNCTION();
1226
1227
CV_Assert(rootModule == NULL);
1228
cv::Ptr<Module> rootModule_ = cv::makePtr<Module>("Sequential");
1229
rootModule = rootModule_.get();
1230
curModule = rootModule;
1231
1232
THFile_seek(file, 0);
1233
readObject();
1234
1235
net = net_;
1236
std::vector<std::pair<int, Module*> > addedModules;
1237
fill(rootModule, addedModules);
1238
1239
rootModule = NULL;
1240
curModule = NULL;
1241
}
1242
};
1243
1244
Mat readTorchBlob(const String &filename, bool isBinary)
1245
{
1246
TorchImporter importer(filename, isBinary);
1247
importer.readObject();
1248
CV_Assert(importer.tensors.size() == 1);
1249
1250
return importer.tensors.begin()->second;
1251
}
1252
1253
Net readNetFromTorch(const String &model, bool isBinary)
1254
{
1255
CV_TRACE_FUNCTION();
1256
1257
TorchImporter importer(model, isBinary);
1258
Net net;
1259
importer.populateNet(net);
1260
return net;
1261
}
1262
1263
CV__DNN_INLINE_NS_END
1264
}} // namespace
1265
1266