CoCalc -- torch_importer.cpp

GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/dnn/src/torch/torch_importer.cpp
¹⁶³³⁷ views
1
/*M///////////////////////////////////////////////////////////////////////////////////////
2
//
3
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4
//
5
//  By downloading, copying, installing or using the software you agree to this license.
6
//  If you do not agree to this license, do not download, install,
7
//  copy or use the software.
8
//
9
//
10
//                           License Agreement
11
//                For Open Source Computer Vision Library
12
//
13
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14
// Third party copyrights are property of their respective owners.
15
//
16
// Redistribution and use in source and binary forms, with or without modification,
17
// are permitted provided that the following conditions are met:
18
//
19
//   * Redistribution's of source code must retain the above copyright notice,
20
//     this list of conditions and the following disclaimer.
21
//
22
//   * Redistribution's in binary form must reproduce the above copyright notice,
23
//     this list of conditions and the following disclaimer in the documentation
24
//     and/or other materials provided with the distribution.
25
//
26
//   * The name of the copyright holders may not be used to endorse or promote products
27
//     derived from this software without specific prior written permission.
28
//
29
// This software is provided by the copyright holders and contributors "as is" and
30
// any express or implied warranties, including, but not limited to, the implied
31
// warranties of merchantability and fitness for a particular purpose are disclaimed.
32
// In no event shall the Intel Corporation or contributors be liable for any direct,
33
// indirect, incidental, special, exemplary, or consequential damages
34
// (including, but not limited to, procurement of substitute goods or services;
35
// loss of use, data, or profits; or business interruption) however caused
36
// and on any theory of liability, whether in contract, strict liability,
37
// or tort (including negligence or otherwise) arising in any way out of
38
// the use of this software, even if advised of the possibility of such damage.
39
//
40
//M*/
41

42
#include "../precomp.hpp"
43
#include <limits>
44
#include <set>
45
#include <map>
46
#include <algorithm>
47
#include <iostream>
48
#include <fstream>
49

50
#include "THDiskFile.h"
51

52
namespace cv {
53
namespace dnn {
54
CV__DNN_INLINE_NS_BEGIN
55

56
using namespace TH;
57

58
//#ifdef NDEBUG
59
static bool dbgPrint = false;
60
//#else
61
//static bool dbgPrint = true;
62
//#endif
63

64
enum LuaType
65
{
66
    TYPE_NIL      = 0,
67
    TYPE_NUMBER   = 1,
68
    TYPE_STRING   = 2,
69
    TYPE_TABLE    = 3,
70
    TYPE_TORCH    = 4,
71
    TYPE_BOOLEAN  = 5,
72
    TYPE_FUNCTION = 6,
73
    TYPE_RECUR_FUNCTION = 8,
74
    LEGACY_TYPE_RECUR_FUNCTION = 7
75
};
76

77
// We use OpenCV's types to manage CV_ELEM_SIZE.
78
enum TorchType
79
{
80
    TYPE_DOUBLE = CV_64F,
81
    TYPE_FLOAT  = CV_32F,
82
    TYPE_BYTE   = CV_8U,
83
    TYPE_CHAR   = CV_8S,
84
    TYPE_SHORT  = CV_16S,
85
    TYPE_INT    = CV_32S,
86
    TYPE_LONG   = CV_32SC2
87
};
88

89
template<typename T>
90
static String toString(const T &v)
91
{
92
    std::ostringstream ss;
93
    ss << v;
94
    return ss.str();
95
}
96

97
static inline bool startsWith(const String &str, const char *substr)
98
{
99
    return str.find(substr) == 0;
100
}
101

102
static inline bool endsWith(const String &str, const char *substr)
103
{
104
    return str.rfind(substr) == str.length() - strlen(substr);
105
}
106

107
struct TorchImporter
108
{
109
    typedef std::map<String, std::pair<int, Mat> > TensorsMap;
110
    Net net;
111

112
    cv::Ptr<THFile> file;
113
    std::set<int> readedIndexes;
114
    std::map<int, Mat> storages;
115
    std::map<int, Mat> tensors;
116
    // Stack with numbers of unconnected layers per scope (Sequential, ConcatTable etc.)
117
    std::vector<int> numUnconnectedLayers;
118

119
    struct Module
120
    {
121
        String thName, apiType;
122
        dnn::LayerParams params;
123
        std::vector<cv::Ptr<Module> > modules;
124

125
        Module(const String &_thName, const String &_apiType = String())
126
            : thName(_thName), apiType(_apiType) {}
127
    };
128

129
    Module *rootModule;
130
    Module *curModule;
131
    int moduleCounter;
132

133
    TorchImporter(String filename, bool isBinary)
134
    {
135
        CV_TRACE_FUNCTION();
136

137
        rootModule = curModule = NULL;
138
        moduleCounter = 0;
139

140
        file = cv::Ptr<THFile>(THDiskFile_new(filename, "r", 0), THFile_free);
141
        CV_Assert(file && THFile_isOpened(file));
142

143
        if (isBinary)
144
            THFile_binary(file);
145
        else
146
            THFile_ascii(file);
147
    }
148

149
    /* Simple readers */
150

151
    inline int readInt()
152
    {
153
        return THFile_readIntScalar(file);
154
    }
155

156
    inline long readLong()
157
    {
158
        return THFile_readLongScalar(file);
159
    }
160

161
    inline bool readBool()
162
    {
163
        return readInt() != 0;
164
    }
165

166
    inline double readDouble()
167
    {
168
        return THFile_readDoubleScalar(file);
169
    }
170

171
    inline String readString()
172
    {
173
        int size = THFile_readIntScalar(file);
174
        String str(size, '\0');
175
        THFile_readCharRaw(file, const_cast<char*>(str.c_str()), size);
176
        return str;
177
    }
178

179
    inline String readTorchClassName()
180
    {
181
        String version = readString();
182
        return startsWith(version, "V ") ? readString() : version;
183
    }
184

185
    inline void readFunction()
186
    {
187
        readString();
188
        readObject();
189
    }
190

191
    void readTable(int index = -1)
192
    {
193
        index = (index < 0) ? readInt() : index;
194

195
        if (readedIndexes.count(index))
196
            return;
197

198
        readedIndexes.insert(index);
199

200
        int size = readInt();
201

202
        for (int i = 0; i < size; i++)
203
        {
204
            readObject(); //key
205
            readObject(); //value
206
        }
207
    }
208

209
    /* Special readers */
210

211
    static inline int parseTorchType(const String &str, const char *suffix, const char *prefix = "torch.")
212
    {
213
        if (startsWith(str, prefix) && endsWith(str, suffix))
214
        {
215
           String typeStr = str.substr(strlen(prefix), str.length() - strlen(prefix) - strlen(suffix));
216

217
           if (typeStr == "Double")
218
               return TYPE_DOUBLE;
219
           else if (typeStr == "Float" || typeStr == "Cuda")
220
               return TYPE_FLOAT;
221
           else if (typeStr == "Byte")
222
               return TYPE_BYTE;
223
           else if (typeStr == "Char")
224
               return TYPE_CHAR;
225
           else if (typeStr == "Short")
226
               return TYPE_SHORT;
227
           else if (typeStr == "Int")
228
               return TYPE_INT;
229
           else if (typeStr == "Long")
230
               return TYPE_LONG;
231
           else
232
               CV_Error(Error::StsNotImplemented, "Unknown type \"" + typeStr + "\" of torch class \"" + str + "\"");
233
        }
234

235
        return -1;
236
    }
237

238
    static int parseTensorType(const String &className)
239
    {
240
        return parseTorchType(className, "Tensor");
241
    }
242

243
    static int parseStorageType(const String &className)
244
    {
245
        return parseTorchType(className, "Storage");
246
    }
247

248
    void readTorchStorage(int index, int type = -1)
249
    {
250
        long size = readLong();
251
        Mat storageMat;
252

253
        switch (type)
254
        {
255
        case TYPE_FLOAT:
256
            storageMat.create(1, size, CV_32F);
257
            THFile_readFloatRaw(file, (float*)storageMat.data, size);
258
            break;
259
        case TYPE_DOUBLE:
260
            storageMat.create(1, size, CV_64F);
261
            THFile_readDoubleRaw(file, (double*)storageMat.data, size);
262
            break;
263
        case TYPE_CHAR:
264
            storageMat.create(1, size, CV_8S);
265
            THFile_readByteRaw(file, (uchar*)storageMat.data, size);
266
            break;
267
        case TYPE_BYTE:
268
            storageMat.create(1, size, CV_8U);
269
            THFile_readByteRaw(file, (uchar*)storageMat.data, size);
270
            break;
271
        case TYPE_SHORT:
272
            storageMat.create(1, size, CV_16S);
273
            THFile_readShortRaw(file, (short*)storageMat.data, size);
274
            break;
275
        case TYPE_INT:
276
            storageMat.create(1, size, CV_32S);
277
            THFile_readIntRaw(file, (int*)storageMat.data, size);
278
            break;
279
        case TYPE_LONG:
280
        {
281
            storageMat.create(1, size, CV_64F);   //handle LongStorage as CV_64F Mat
282
            double *buf = storageMat.ptr<double>();
283
            THFile_readLongRaw(file, (int64*)buf, size);
284

285
            for (size_t i = (size_t)size; i-- > 0; )
286
                buf[i] = ((int64*)buf)[i];
287
            break;
288
        }
289
        default:
290
            CV_Error(Error::StsInternal, "");
291
            break;
292
        }
293

294
        storages.insert(std::make_pair(index, storageMat));
295
    }
296

297
    void readTorchTable(Dict &scalarParams, TensorsMap &tensorParams)
298
    {
299
        int luaType = readInt();
300
        int index = readInt();
301

302
        CV_Assert(luaType == TYPE_TABLE && readedIndexes.count(index) == 0);
303
        readedIndexes.insert(index);
304

305
        long fpos;
306
        int numPairs = readInt();
307

308
        for (int i = 0; i < numPairs; i++)
309
        {
310
            fpos = THFile_position(file);
311
            int ktype = readInt();
312

313
            if (ktype != TYPE_STRING) //skip non-string fileds
314
            {
315
                THFile_seek(file, fpos);
316
                readObject(); //key
317
                readObject(); //value
318
                continue;
319
            }
320

321
            String key = readString();
322
            if (dbgPrint)
323
                std::cout << i << "th key: " << key << "\n";
324

325
            fpos = THFile_position(file);
326
            int vtype = readInt();
327

328
            if (vtype == TYPE_TORCH)
329
            {
330
                int index = readInt();
331
                int numModules = curModule->modules.size();
332
                readTorchObject(index);
333

334
                if (tensors.count(index)) //tensor was read
335
                {
336
                    tensorParams.insert(std::make_pair(key, std::make_pair(index, tensors[index])));
337
                }
338
                else if (storages.count(index)) //storage was read
339
                {
340
                    Mat &matStorage = storages[index];
341
                    Mat matCasted;
342
                    matStorage.convertTo(matCasted, CV_64F);
343

344
                    DictValue scalar = DictValue::arrayReal(matCasted.ptr<double>(), matCasted.total());
345
                    scalarParams.set(key, scalar);
346
                }
347
                else
348
                {
349
                    // Only tensors and scalars are supported for table fields.
350
                    // i.e. nn.Inception has field `transfer` which is an
351
                    // activation layer. So we remove added modules as readTorchObject(index).
352
                    while (curModule->modules.size() > numModules)
353
                        curModule->modules.pop_back();
354
                }
355
            }
356
            else if (vtype == TYPE_NUMBER)
357
            {
358
                scalarParams.set(key, readDouble());
359
            }
360
            else if (vtype == TYPE_STRING)
361
            {
362
                scalarParams.set(key, readString());
363
            }
364
            else if (vtype == TYPE_BOOLEAN)
365
            {
366
                scalarParams.set(key, readBool());
367
            }
368
            else
369
            {
370
                THFile_seek(file, fpos);
371
                readObject();
372
            }
373
        }
374

375
        //Debug output
376
        if (dbgPrint)
377
        {
378
            std::cout << "scalarParams:\n";
379
            std::cout << scalarParams;
380

381
            std::cout << "#" << tensorParams.size() << " tensorParams:\n";
382
            std::map<String,std::pair<int, Mat> >::const_iterator it;
383
            for (it = tensorParams.begin(); it != tensorParams.end(); it++)
384
                std::cout << it->first << ": Tensor " << it->second.second.size << "\n";
385
        }
386
    }
387

388
    void readTorchTensor(int indexTensor, int typeTensor)
389
    {
390
        int ndims = readInt();
391
        AutoBuffer<int64, 4> sizes(ndims);
392
        AutoBuffer<int64, 4> steps(ndims);
393
        THFile_readLongRaw(file, sizes.data(), ndims);
394
        THFile_readLongRaw(file, steps.data(), ndims);
395
        long offset = readLong() - 1;
396

397
        //read Storage
398
        int typeidx = readInt();
399
        CV_Assert(typeidx == TYPE_TORCH || (typeidx == TYPE_NIL && ndims == 0));
400

401
        if (typeidx == TYPE_NIL)
402
        {
403
            tensors.insert(std::make_pair(indexTensor, Mat()));
404
            return;
405
        }
406

407
        int indexStorage = readInt();
408
        if (readedIndexes.count(indexStorage) == 0)
409
        {
410
            String className = readTorchClassName();
411
            int typeStorage = parseStorageType(className);
412
            CV_Assert(typeStorage >= 0 && typeTensor == typeStorage);
413
            readTorchStorage(indexStorage, typeStorage);
414
            typeTensor = storages[indexStorage].type();
415
            readedIndexes.insert(indexStorage);
416
        }
417

418
        //small check
419
        size_t requireElems = (size_t)offset + (size_t)steps[0] * (size_t)sizes[0];
420
        size_t storageElems = storages[indexStorage].total();
421
        if (requireElems > storageElems)
422
            CV_Error(Error::StsBadSize, "Storage has insufficient number of elements for requested Tensor");
423

424
        //convert sizes
425
        AutoBuffer<int, 4> isizes(ndims);
426
        AutoBuffer<size_t, 4> ssteps(ndims);
427
        for (int i = ndims - 1; i >= 0; i--)
428
        {
429
            isizes[i] = (int)sizes[i];
430
            ssteps[i] = (size_t)steps[i] * CV_ELEM_SIZE(typeTensor);
431
        }
432

433
        //allocate Blob
434
        Mat srcMat(ndims, isizes.data(), typeTensor , storages[indexStorage].ptr() + offset*CV_ELEM_SIZE(typeTensor), ssteps.data());
435
        int dstType = CV_32F;
436

437
        Mat blob;
438
        srcMat.convertTo(blob, dstType);
439

440
        tensors.insert(std::make_pair(indexTensor, blob));
441
    }
442

443
    static bool isNNClass(const String &className, String &nnName)
444
    {
445
        const char *prefixes[] = {"nn.", "cunn.", "cudnn.", "fbcunn.", NULL};
446

447
        for (int i = 0; prefixes[i]; i++)
448
        {
449
            if (startsWith(className, prefixes[i]))
450
            {
451
                nnName = className.substr(strlen(prefixes[i]));
452
                return true;
453
            }
454
        }
455

456
        return false;
457
    }
458

459
    static void convertTorchKernelsParams(const Dict &torchParams, cv::dnn::LayerParams &layerParams)
460
    {
461
        layerParams.set("kernel_h", torchParams.get<int>("kH"));
462
        layerParams.set("kernel_w", torchParams.get<int>("kW"));
463
        layerParams.set("stride_h", torchParams.get<int>("dH"));
464
        layerParams.set("stride_w", torchParams.get<int>("dW"));
465
        layerParams.set("pad_h", torchParams.get<int>("padH", 0));
466
        layerParams.set("pad_w", torchParams.get<int>("padW", 0));
467
    }
468

469
    void readTorchObject(int index)
470
    {
471
        if(readedIndexes.count(index))
472
            return;
473

474
        String className = readTorchClassName();
475
        String nnName;
476

477
        if (dbgPrint)
478
            std::cout << "Class: " << className << std::endl;
479

480
        int type;
481
        if ( (type = parseTensorType(className)) >= 0 ) //is Tensor
482
        {
483
            readTorchTensor(index, type);
484
        }
485
        else if ( (type = parseStorageType(className)) >= 0 ) //is Storage
486
        {
487
            readTorchStorage(index, type);
488
        }
489
        else if (isNNClass(className, nnName))
490
        {
491
            Dict scalarParams;
492
            TensorsMap tensorParams;
493

494
            cv::Ptr<Module> newModule(new Module(nnName));
495
            cv::dnn::LayerParams &layerParams = newModule->params;
496

497
            layerParams.set("torch_index", index);
498

499
            if (nnName == "Sequential" || nnName == "Parallel" ||
500
                nnName == "Concat" || nnName == "ConcatTable" || nnName == "JoinTable" ||
501
                nnName == "DepthConcat" || nnName == "Inception")
502
            {
503
                Module *parentModule = curModule;
504
                curModule->modules.push_back(newModule);
505
                curModule = newModule;
506
                readTorchTable(scalarParams, tensorParams);
507
                curModule = parentModule;
508

509
                if (nnName == "Parallel")
510
                {
511
                    layerParams.set("inputDimension", scalarParams.get<int>("inputDimension"));
512
                    layerParams.set("outputDimension", scalarParams.get<int>("outputDimension"));
513
                }
514
                else if (nnName == "Concat" || nnName == "JoinTable" || nnName == "DepthConcat")
515
                {
516
                    layerParams.set("dimension", scalarParams.get<int>("dimension"));
517
                }
518
            }
519
            else if (nnName == "SpatialConvolution" || nnName == "SpatialConvolutionMM")
520
            {
521
                newModule->apiType = "Convolution";
522
                readTorchTable(scalarParams, tensorParams);
523

524
                CV_Assert(tensorParams.count("weight"));
525
                layerParams.blobs.push_back(tensorParams["weight"].second);
526

527
                bool bias = tensorParams.count("bias") != 0;
528
                layerParams.set("bias_term", bias);
529
                if (bias)
530
                    layerParams.blobs.push_back(tensorParams["bias"].second);
531

532
                layerParams.set("num_output", scalarParams.get<int>("nOutputPlane"));
533
                convertTorchKernelsParams(scalarParams, layerParams);
534

535
                if (nnName == "SpatialConvolutionMM")
536
                {
537
                    // Split weights from a [ outCh x inCh*kH*kW ] 2D matrix
538
                    // onto a 4D [ outCh x inCh x kH x kW ] blob.
539
                    CV_Assert(layerParams.blobs[0].dims == 2);
540
                    const int kernel = layerParams.blobs[0].size[1];  // inCh * kH * kW
541
                    MatShape kernelShape(4);
542
                    kernelShape[0] = layerParams.blobs[0].size[0];  // outCh.
543
                    kernelShape[2] = layerParams.get<int>("kernel_h");
544
                    kernelShape[3] = layerParams.get<int>("kernel_w");
545
                    kernelShape[1] = kernel / (kernelShape[2] * kernelShape[3]);  // inCh.
546
                    layerParams.blobs[0] = layerParams.blobs[0].reshape(1, kernelShape);
547
                }
548
                curModule->modules.push_back(newModule);
549
            }
550
            else if (nnName == "SpatialLPPooling")
551
            {
552
                // nn.Sequential {
553
                //     [input -> (1) -> (2) -> output]
554
                //     (1): nn.Sequential {
555
                //       [input -> (1) -> (2) -> (3) -> (4) -> output]
556
                //       (1): nn.Power
557
                //       (2): nn.SpatialAveragePooling(...)
558
                //       (3): nn.MulConstant
559
                //       (4): nn.Power
560
                //     }
561
                //     (2): nn.Sigmoid
562
                // }
563
                // nn.SpatialLPPooling is just a table so we skip it.
564
                readTorchTable(scalarParams, tensorParams);
565
            }
566
            else if (nnName == "SpatialMaxPooling" || nnName == "SpatialAveragePooling")
567
            {
568
                newModule->apiType = "Pooling";
569
                readTorchTable(scalarParams, tensorParams);
570

571
                if (nnName == "SpatialMaxPooling") {
572
                    layerParams.set("pool", "MAX");
573
                    layerParams.set("indices_blob_id", tensorParams["indices"].first);
574
                }
575
                if (nnName == "SpatialAveragePooling")
576
                {
577
                    layerParams.set("pool", "AVE");
578
                    layerParams.set("ave_pool_padded_area", scalarParams.has("count_include_pad") &&
579
                                                            scalarParams.get<bool>("count_include_pad"));
580
                }
581
                convertTorchKernelsParams(scalarParams, layerParams);
582

583
                CV_Assert(scalarParams.has("ceil_mode"));
584
                layerParams.set("ceil_mode", scalarParams.get<bool>("ceil_mode"));
585

586
                curModule->modules.push_back(newModule);
587
            }
588
            else if (nnName == "Linear")
589
            {
590
                newModule->apiType = "InnerProduct";
591
                readTorchTable(scalarParams, tensorParams);
592

593
                CV_Assert(tensorParams.count("weight"));
594
                Mat weightBlob = tensorParams["weight"].second;
595
                layerParams.blobs.push_back(weightBlob);
596

597
                bool bias = tensorParams.count("bias") != 0;
598
                if (bias)
599
                    layerParams.blobs.push_back(tensorParams["bias"].second);
600
                layerParams.set("bias_term", bias);
601

602
                layerParams.set("num_output", weightBlob.size[0]);
603
                curModule->modules.push_back(newModule);
604
            }
605
            else if (nnName == "Reshape" || nnName == "View")
606
            {
607
                newModule->apiType = "Reshape";
608

609
                readTorchTable(scalarParams, tensorParams);
610
                CV_Assert(scalarParams.has("size"));
611

612
                DictValue dimParam = scalarParams.get("size");
613
                layerParams.set("dim", dimParam);
614

615
                int axis = (int)scalarParams.get<bool>("batchMode", true);
616
                layerParams.set("axis", axis);
617

618
                curModule->modules.push_back(newModule);
619
            }
620
            else if (nnName == "ReLU")
621
            {
622
                curModule->modules.push_back(cv::Ptr<Module>(new Module(nnName, "ReLU")));
623
                readObject();
624
            }
625
            else if (nnName == "Tanh")
626
            {
627
                curModule->modules.push_back(cv::Ptr<Module>(new Module(nnName, "TanH")));
628
                readObject();
629
            }
630
            else if (nnName == "Sigmoid")
631
            {
632
                curModule->modules.push_back(cv::Ptr<Module>(new Module(nnName, "Sigmoid")));
633
                readObject();
634
            }
635
            else if (nnName == "SpatialBatchNormalization" || nnName == "InstanceNormalization" ||
636
                     nnName == "BatchNormalization")
637
            {
638
                newModule->apiType = "BatchNorm";
639
                readTorchTable(scalarParams, tensorParams);
640

641
                CV_Assert(scalarParams.has("eps"));
642
                float eps = float(scalarParams.get<double>("eps"));
643
                layerParams.set("eps", eps);
644

645
                if (tensorParams.count("running_mean"))
646
                {
647
                    layerParams.blobs.push_back(tensorParams["running_mean"].second);
648
                }
649
                else
650
                {
651
                    CV_Assert(scalarParams.has("nOutput"));
652
                    layerParams.blobs.push_back(Mat::zeros(1, scalarParams.get<int>("nOutput"), CV_32F));
653
                }
654

655
                if (tensorParams.count("running_var"))
656
                {
657
                    layerParams.blobs.push_back(tensorParams["running_var"].second);
658
                }
659
                else if (tensorParams.count("running_std"))
660
                {
661
                    layerParams.blobs.push_back(tensorParams["running_std"].second);
662
                    pow(layerParams.blobs.back(), -2, layerParams.blobs.back());
663
                    subtract(layerParams.blobs.back(), eps, layerParams.blobs.back());
664
                }
665
                else
666
                {
667
                    CV_Assert(scalarParams.has("nOutput"));
668
                    layerParams.blobs.push_back(Mat::ones(1, scalarParams.get<int>("nOutput"), CV_32F));
669
                }
670

671
                if (tensorParams.count("weight"))
672
                {
673
                    layerParams.set("has_weight", true);
674
                    layerParams.blobs.push_back(tensorParams["weight"].second);
675
                }
676

677
                if (tensorParams.count("bias"))
678
                {
679
                    layerParams.set("has_bias", true);
680
                    layerParams.blobs.push_back(tensorParams["bias"].second);
681
                }
682

683
                if (nnName == "InstanceNormalization")
684
                {
685
                    cv::Ptr<Module> mvnModule(new Module(nnName));
686
                    mvnModule->apiType = "MVN";
687
                    curModule->modules.push_back(mvnModule);
688

689
                    layerParams.blobs[0].setTo(0);  // batch norm's mean
690
                    layerParams.blobs[1].setTo(1);  // batch norm's std
691
                }
692

693
                curModule->modules.push_back(newModule);
694
            }
695
            else if (nnName == "PReLU")
696
            {
697
                readTorchTable(scalarParams, tensorParams);
698

699
                CV_Assert(tensorParams.count("weight"));
700

701
                size_t outputChannels = static_cast<int>(scalarParams.get<double>("nOutputPlane"));
702
                if (outputChannels) {
703

704
                    CV_Assert(tensorParams["weight"].second.total() == outputChannels);
705
                    layerParams.blobs.push_back(tensorParams["weight"].second);
706

707
                    newModule->apiType = "ChannelsPReLU";
708
                }
709
                else {
710
                    CV_Assert(tensorParams["weight"].second.total() == 1);
711
                    float negative_slope = *tensorParams["weight"].second.ptr<float>();
712
                    layerParams.set("negative_slope", negative_slope);
713

714
                    newModule->apiType = "ReLU";
715
                }
716

717
                curModule->modules.push_back(newModule);
718
            }
719
            else if (nnName == "SpatialDropout" || nnName == "Dropout")
720
            {
721
                readTorchTable(scalarParams, tensorParams);
722
                CV_Assert(scalarParams.has("p"));
723

724
                if (scalarParams.has("v2") && scalarParams.get<bool>("v2"))
725
                {
726
                    newModule->apiType = "Identity";
727
                }
728
                else
729
                {
730
                    float scale = 1 -  scalarParams.get<double>("p");
731

732
                    CV_Assert(scale > 0);
733

734
                    newModule->apiType = "Power";
735
                    layerParams.set("scale", scale);
736
                }
737
                curModule->modules.push_back(newModule);
738
            }
739
            // TotalVariation layer is from fast-neural-style project: https://github.com/jcjohnson/fast-neural-style
740
            // It's a loss function that has an Identity forward.
741
            else if (nnName == "Identity" || nnName == "TotalVariation")
742
            {
743
                readTorchTable(scalarParams, tensorParams);
744
                newModule->apiType = "Identity";
745
                curModule->modules.push_back(newModule);
746
            }
747
            else if (nnName == "Normalize")
748
            {
749
                readTorchTable(scalarParams, tensorParams);
750
                CV_Assert(scalarParams.has("p"));
751

752
                layerParams.set("p", scalarParams.get<float>("p"));
753
                if (scalarParams.has("eps"))
754
                    layerParams.set("eps", scalarParams.get<float>("eps"));
755

756
                newModule->apiType = "Normalize";
757
                curModule->modules.push_back(newModule);
758
            }
759
            else if (nnName == "Padding")
760
            {
761
                readTorchTable(scalarParams, tensorParams);
762
                newModule->apiType = "Padding";
763

764
                CV_Assert(scalarParams.has("pad") && scalarParams.has("dim"));
765
                if (scalarParams.has("index") && scalarParams.get<int>("index") != 1)
766
                    CV_Error(Error::StsNotImplemented, "Padding with offset is not implemented");
767

768
                if (scalarParams.has("value"))
769
                    layerParams.set("value", scalarParams.get<float>("value"));
770

771
                if (scalarParams.has("nInputDim"))
772
                    layerParams.set("input_dims", scalarParams.get<int>("nInputDim"));
773

774
                int dim = scalarParams.get<int>("dim") - 1;  // In Lua we start from 1.
775
                int pad = scalarParams.get<int>("pad");
776

777
                std::vector<int> paddings((dim + 1) * 2, 0);
778
                if (pad > 0)
779
                    paddings[dim * 2 + 1] = pad;  // Pad after (right).
780
                else
781
                    paddings[dim * 2] = -pad;  // Pad before (left).
782
                layerParams.set("paddings", DictValue::arrayInt<int*>(&paddings[0], paddings.size()));
783

784
                curModule->modules.push_back(newModule);
785
            }
786
            else if (nnName == "CAddTable")
787
            {
788
                curModule->modules.push_back(newModule);
789
                readObject();
790
            }
791
            else if (nnName == "SpatialDilatedConvolution")
792
            {
793
                readTorchTable(scalarParams, tensorParams);
794
                newModule->apiType = "Convolution";
795
                CV_Assert(scalarParams.has("padW") &&
796
                          scalarParams.has("padH")&&
797
                          scalarParams.has("dW")&&
798
                          scalarParams.has("dH")&&
799
                          scalarParams.has("dilationW")&&
800
                          scalarParams.has("dilationH")&&
801
                          scalarParams.has("kW")&&
802
                          scalarParams.has("kH")&&
803
                          scalarParams.has("nOutputPlane"));
804

805
                layerParams.set("kernel_w", static_cast<int>(scalarParams.get<double>("kW")));
806
                layerParams.set("kernel_h", static_cast<int>(scalarParams.get<double>("kH")));
807
                layerParams.set("pad_w", static_cast<int>(scalarParams.get<double>("padW")));
808
                layerParams.set("pad_h", static_cast<int>(scalarParams.get<double>("padH")));
809
                layerParams.set("stride_w", static_cast<int>(scalarParams.get<double>("dW")));
810
                layerParams.set("stride_h", static_cast<int>(scalarParams.get<double>("dH")));
811
                layerParams.set("dilation_w", static_cast<int>(scalarParams.get<double>("dilationW")));
812
                layerParams.set("dilation_h", static_cast<int>(scalarParams.get<double>("dilationH")));
813
                layerParams.set("num_output", static_cast<int>(scalarParams.get<double>("nOutputPlane")));
814

815
                layerParams.blobs.push_back(tensorParams["weight"].second);
816

817
                bool bias = tensorParams.count("bias");
818
                layerParams.set("bias_term", bias);
819
                if (bias)
820
                    layerParams.blobs.push_back(tensorParams["bias"].second);
821

822
                curModule->modules.push_back(newModule);
823
            }
824
            else if (nnName == "SpatialFullConvolution")
825
            {
826
                readTorchTable(scalarParams, tensorParams);
827
                newModule->apiType = "Deconvolution";
828
                CV_Assert(scalarParams.has("padW") &&
829
                          scalarParams.has("padH")&&
830
                          scalarParams.has("dW")&&
831
                          scalarParams.has("dH")&&
832
                          scalarParams.has("adjW")&&
833
                          scalarParams.has("adjH")&&
834
                          scalarParams.has("kW")&&
835
                          scalarParams.has("kH")&&
836
                          scalarParams.has("nOutputPlane"));
837

838
                layerParams.set("kernel_w", static_cast<int>(scalarParams.get<double>("kW")));
839
                layerParams.set("kernel_h", static_cast<int>(scalarParams.get<double>("kH")));
840
                layerParams.set("pad_w", static_cast<int>(scalarParams.get<double>("padW")));
841
                layerParams.set("pad_h", static_cast<int>(scalarParams.get<double>("padH")));
842
                layerParams.set("stride_w", static_cast<int>(scalarParams.get<double>("dW")));
843
                layerParams.set("stride_h", static_cast<int>(scalarParams.get<double>("dH")));
844
                layerParams.set("adj_w", static_cast<int>(scalarParams.get<double>("adjW")));
845
                layerParams.set("adj_h", static_cast<int>(scalarParams.get<double>("adjH")));
846
                layerParams.set("num_output", static_cast<int>(scalarParams.get<double>("nOutputPlane")));
847

848
                layerParams.blobs.push_back(tensorParams["weight"].second);
849

850
                bool bias = tensorParams.count("bias");
851
                layerParams.set("bias_term", bias);
852
                if (bias)
853
                    layerParams.blobs.push_back(tensorParams["bias"].second);
854

855
                curModule->modules.push_back(newModule);
856
            }
857
            else if (nnName == "SpatialMaxUnpooling")
858
            {
859
                readTorchTable(scalarParams, tensorParams);
860
                CV_Assert(tensorParams.count("indices"));
861

862
                layerParams.set("indices_blob_id", tensorParams["indices"].first);
863
                curModule->modules.push_back(newModule);
864
            }
865
            else if (nnName == "SoftMax")
866
            {
867
                newModule->apiType = "SoftMax";
868
                curModule->modules.push_back(newModule);
869
            }
870
            else if (nnName == "LogSoftMax")
871
            {
872
                newModule->apiType = "SoftMax";
873
                layerParams.set("log_softmax", true);
874
                curModule->modules.push_back(newModule);
875
            }
876
            else if (nnName == "SpatialCrossMapLRN")
877
            {
878
                newModule->apiType = "LRN";
879
                readTorchTable(scalarParams, tensorParams);
880

881
                CV_Assert(scalarParams.has("alpha"));
882
                CV_Assert(scalarParams.has("beta"));
883
                CV_Assert(scalarParams.has("k"));
884
                CV_Assert(scalarParams.has("size"));
885

886
                layerParams.set("norm_region", "ACROSS_CHANNELS");
887
                layerParams.set("alpha", scalarParams.get<float>("alpha"));
888
                layerParams.set("beta", scalarParams.get<float>("beta"));
889
                layerParams.set("bias", scalarParams.get<float>("k"));
890
                layerParams.set("local_size", scalarParams.get<int>("size"));
891
                layerParams.set("norm_by_size", true);
892

893
                curModule->modules.push_back(newModule);
894
            }
895
            else if (nnName == "Square" || nnName == "Sqrt" || nnName == "Power")
896
            {
897
                readTorchTable(scalarParams, tensorParams);
898

899
                float power;
900
                if (nnName == "Square") power = 2.0f;
901
                else if (nnName == "Sqrt") power = 0.5f;
902
                else if (nnName == "Power") power = scalarParams.get<float>("pow", 1.0f);
903

904
                newModule->apiType = "Power";
905
                layerParams.set("power", power);
906
                curModule->modules.push_back(newModule);
907
            }
908
            else if (nnName == "MulConstant")
909
            {
910
                readTorchTable(scalarParams, tensorParams);
911
                CV_Assert(scalarParams.has("constant_scalar"));
912
                newModule->apiType = "Power";
913
                layerParams.set("scale", scalarParams.get<float>("constant_scalar"));
914
                curModule->modules.push_back(newModule);
915
            }
916
            else if (nnName == "SpatialZeroPadding" || nnName == "SpatialReflectionPadding")
917
            {
918
                readTorchTable(scalarParams, tensorParams);
919
                CV_Assert_N(scalarParams.has("pad_l"), scalarParams.has("pad_r"),
920
                            scalarParams.has("pad_t"), scalarParams.has("pad_b"));
921
                int padTop = scalarParams.get<int>("pad_t");
922
                int padLeft = scalarParams.get<int>("pad_l");
923
                int padRight = scalarParams.get<int>("pad_r");
924
                int padBottom = scalarParams.get<int>("pad_b");
925
                if (padTop < 0 || padLeft < 0 || padRight < 0 || padBottom < 0)
926
                    CV_Error(Error::StsNotImplemented, "SpatialZeroPadding in cropping mode is not implemented");
927

928
                newModule->apiType = "Padding";
929

930
                // Torch's SpatialZeroPadding works with 3- or 4-dimensional input.
931
                // So we add parameter input_dims=3 to ignore batch dimension if it will be.
932
                std::vector<int> paddings(6, 0);  // CHW
933
                paddings[2] = padTop;
934
                paddings[3] = padBottom;
935
                paddings[4] = padLeft;
936
                paddings[5] = padRight;
937
                layerParams.set("paddings", DictValue::arrayInt<int*>(&paddings[0], paddings.size()));
938
                layerParams.set("input_dims", 3);
939

940
                if (nnName == "SpatialReflectionPadding")
941
                    layerParams.set("type", "reflect");
942

943
                curModule->modules.push_back(newModule);
944
            }
945
            else if (nnName == "ShaveImage")
946
            {
947
                // ShaveImage layer is from fast-neural-style project: https://github.com/jcjohnson/fast-neural-style
948
                // It may be mapped to Slice layer.
949
                readTorchTable(scalarParams, tensorParams);
950
                CV_Assert(scalarParams.has("size"));
951
                int size = scalarParams.get<int>("size");
952

953
                int begins[] = {0, 0, size, size};
954
                int ends[] = {-1, -1, -size - 1, -size - 1};
955

956
                newModule->apiType = "Slice";
957
                layerParams.set("begin", DictValue::arrayInt<int*>(&begins[0], 4));
958
                layerParams.set("end", DictValue::arrayInt<int*>(&ends[0], 4));
959
                curModule->modules.push_back(newModule);
960
            }
961
            else if (nnName == "SpatialUpSamplingNearest")
962
            {
963
                readTorchTable(scalarParams, tensorParams);
964
                CV_Assert(scalarParams.has("scale_factor"));
965
                int scale_factor = scalarParams.get<int>("scale_factor");
966
                newModule->apiType = "Resize";
967
                layerParams.set("interpolation", "nearest");
968
                layerParams.set("zoom_factor", scale_factor);
969
                curModule->modules.push_back(newModule);
970
            }
971
            else
972
            {
973
                // Importer does not know how to map Torch's layer type to an OpenCV's one.
974
                // However we parse all the parameters to let user create a custom layer.
975
                readTorchTable(scalarParams, tensorParams);
976
                for (std::map<String, DictValue>::const_iterator it = scalarParams.begin();
977
                     it != scalarParams.end(); ++it)
978
                {
979
                    layerParams.set(it->first, it->second);
980
                }
981
                for (std::map<String, std::pair<int, Mat> >::iterator it = tensorParams.begin();
982
                     it != tensorParams.end(); ++it)
983
                {
984
                    layerParams.blobs.push_back(it->second.second);
985
                }
986
                newModule->apiType = nnName;
987
                curModule->modules.push_back(newModule);
988
            }
989
        }
990
        else
991
        {
992
            CV_Error(Error::StsNotImplemented, "Unsupported Torch class \"" + className + "\"");
993
        }
994

995
        readedIndexes.insert(index);
996
    }
997

998
    void readObject()
999
    {
1000
        int typeidx = readInt();
1001

1002
        if (typeidx == TYPE_TORCH)
1003
        {
1004
            int index = readInt();
1005
            readTorchObject(index);
1006
            readedIndexes.insert(index);
1007
        }
1008
        else if (typeidx == TYPE_NIL)
1009
            return;
1010
        else if (typeidx == TYPE_NUMBER)
1011
            readDouble();
1012
        else if (typeidx == TYPE_BOOLEAN)
1013
            readBool();
1014
        else if (typeidx == TYPE_STRING)
1015
            readString();
1016
        else if (typeidx == TYPE_TABLE)
1017
            readTable();
1018
        else
1019
            CV_Error(Error::StsNotImplemented, "Unsupported Lua type");
1020
    }
1021

1022
    inline String generateLayerName(const String &label = String())
1023
    {
1024
        return "l" + toString(++this->moduleCounter) + "_" + label;
1025
    }
1026

1027
    int fill(Module *module, std::vector<std::pair<int, Module*> >& addedModules, int prevLayerId = 0, int prevOutNum = 0)
1028
    {
1029
        if (module == NULL)
1030
            return prevLayerId;
1031

1032
        if (module->apiType.length())
1033
        {
1034
            int newLayerId = net.addLayer(generateLayerName(module->apiType), module->apiType, module->params);
1035
            net.connect(prevLayerId, prevOutNum, newLayerId, 0);
1036
            addedModules.push_back(std::make_pair(newLayerId, module));
1037
            return newLayerId;
1038
        }
1039
        else
1040
        {
1041
            if (module->thName == "Sequential" || module->thName == "Inception")
1042
            {
1043
                for (size_t i = 0; i < module->modules.size(); i++)
1044
                {
1045
                    prevLayerId = fill(module->modules[i], addedModules, prevLayerId, prevOutNum);
1046
                    prevOutNum = 0;
1047
                }
1048
                return prevLayerId;
1049
            }
1050
            else if (module->thName == "Concat")
1051
            {
1052
                int newId, mergeId;
1053
                LayerParams mergeParams;
1054
                mergeParams.set("axis", module->params.get<int>("dimension") - 1);
1055

1056
                std::vector<int> branchIds;
1057
                for (int i = 0; i < (int)module->modules.size(); i++)
1058
                {
1059
                    newId = fill(module->modules[i], addedModules, prevLayerId, prevOutNum);
1060
                    branchIds.push_back(newId);
1061
                }
1062

1063
                moduleCounter += 1;  // Skip split layer creation. See https://github.com/opencv/opencv/pull/9384.
1064
                mergeId = net.addLayer(generateLayerName("torchMerge"), "Concat", mergeParams);
1065

1066
                for (int i = 0; i < branchIds.size(); i++)
1067
                {
1068
                    net.connect(branchIds[i], 0, mergeId, i);
1069
                }
1070

1071
                addedModules.push_back(std::make_pair(mergeId, module));
1072
                return mergeId;
1073
            }
1074
            else if (module->thName == "DepthConcat")
1075
            {
1076
                int newId, mergeId;
1077
                LayerParams mergeParams;
1078
                mergeParams.set("axis", module->params.get<int>("dimension") - 1);
1079
                mergeParams.set("padding", true);
1080

1081
                std::vector<int> branchIds;
1082
                for (int i = 0; i < (int)module->modules.size(); i++)
1083
                {
1084
                    newId = fill(module->modules[i], addedModules, prevLayerId, prevOutNum);
1085
                    branchIds.push_back(newId);
1086
                }
1087

1088
                mergeId = net.addLayer(generateLayerName("torchMerge"), "Concat", mergeParams);
1089

1090
                for (int i = 0; i < branchIds.size(); i++)
1091
                {
1092
                    net.connect(branchIds[i], 0, mergeId, i);
1093
                }
1094

1095
                addedModules.push_back(std::make_pair(mergeId, module));
1096
                return mergeId;
1097
            }
1098
            else if (module->thName == "Parallel")
1099
            {
1100
                int newId, splitId, mergeId, reshapeId;
1101

1102
                LayerParams splitParams, mergeParams, reshapeParams;
1103
                splitParams.set("axis", module->params.get<int>("inputDimension") - 1);
1104
                mergeParams.set("axis", module->params.get<int>("outputDimension") - 1);
1105
                reshapeParams.set("axis", splitParams.get<int>("axis"));
1106
                reshapeParams.set("num_axes", 1);
1107

1108
                splitId = net.addLayer(generateLayerName("torchSplit"), "Slice", splitParams);
1109
                reshapeId = net.addLayer(generateLayerName("torchReshape"), "Reshape", reshapeParams);
1110
                net.connect(prevLayerId, prevOutNum, splitId, 0);
1111

1112
                std::vector<int> branchIds;
1113
                for (int i = 0; i < (int)module->modules.size(); i++)
1114
                {
1115
                    net.connect(splitId, i, reshapeId, i);
1116
                    newId = fill(module->modules[i], addedModules, reshapeId, i);
1117
                    branchIds.push_back(newId);
1118
                }
1119

1120
                mergeId = net.addLayer(generateLayerName("torchMerge"), "Concat", mergeParams);
1121

1122
                for (int i = 0; i < branchIds.size(); i++)
1123
                {
1124
                    net.connect(branchIds[i], 0, mergeId, i);
1125
                }
1126

1127
                addedModules.push_back(std::make_pair(mergeId, module));
1128
                return mergeId;
1129
            }
1130
            else if (module->thName == "ConcatTable") {
1131
                int newId = -1;
1132
                moduleCounter += 1;  // Skip split layer creation. See https://github.com/opencv/opencv/pull/9384.
1133
                for (int i = 0; i < (int)module->modules.size(); i++)
1134
                {
1135
                    newId = fill(module->modules[i], addedModules, prevLayerId, prevOutNum);
1136
                }
1137
                numUnconnectedLayers.push_back(module->modules.size());
1138
                return newId;
1139
            }
1140
            else if (module->thName == "JoinTable") {
1141
                std::vector<int> ids = net.getUnconnectedOutLayers();
1142

1143
                int mergeId;
1144
                LayerParams mergeParams;
1145
                mergeParams.set("axis", module->params.get<int>("dimension") - 1);
1146

1147
                mergeId = net.addLayer(generateLayerName("torchMerge"), "Concat", mergeParams);
1148
                addedModules.push_back(std::make_pair(mergeId, module));
1149

1150
                // Connect to the last number of unconnected layers.
1151
                CV_Assert(!numUnconnectedLayers.empty());
1152
                const int numInputs = numUnconnectedLayers.back();
1153
                numUnconnectedLayers.pop_back();
1154
                CV_Assert(numInputs <= ids.size());
1155
                for (int i = 0; i < numInputs; i++)
1156
                {
1157
                    net.connect(ids[ids.size() - numInputs + i], 0, mergeId, i);
1158
                }
1159

1160
                return mergeId;
1161
            }
1162
            else if (module->thName == "CAddTable") {
1163
                String name = generateLayerName("torchCAddTable");
1164
                std::vector<int> ids = net.getUnconnectedOutLayers();
1165
                LayerParams params;
1166
                params.set("operation", "sum");
1167

1168

1169
                int id = net.addLayer(name, "Eltwise", params);
1170

1171
                // Connect to the last number of unconnected layers.
1172
                CV_Assert(!numUnconnectedLayers.empty());
1173
                const int numInputs = numUnconnectedLayers.back();
1174
                numUnconnectedLayers.pop_back();
1175
                CV_Assert(numInputs <= ids.size());
1176
                for (int i = 0; i < numInputs; i++)
1177
                {
1178
                    net.connect(ids[ids.size() - numInputs + i], 0, id, i);
1179
                }
1180

1181
                addedModules.push_back(std::make_pair(id, module));
1182
                return id;
1183
            }
1184
            else if (module->thName == "SpatialMaxUnpooling") {
1185
                CV_Assert(module->params.has("indices_blob_id"));
1186
                int indicesBlobId = module->params.get<int>("indices_blob_id");
1187
                std::pair<int, Module*> poolingLayer;
1188
                poolingLayer.first = -1;
1189

1190
                for(int i = 0; i < addedModules.size(); i++)
1191
                {
1192
                    if (addedModules[i].second->apiType == "Pooling" &&
1193
                        addedModules[i].second->params.has("indices_blob_id") &&
1194
                        addedModules[i].second->params.get<int>("indices_blob_id") == indicesBlobId)
1195
                    {
1196
                        poolingLayer = addedModules[i];
1197
                        break;
1198
                    }
1199
                }
1200

1201
                module->params.set("pool_k_h", poolingLayer.second->params.get<int>("kernel_h"));
1202
                module->params.set("pool_k_w", poolingLayer.second->params.get<int>("kernel_w"));
1203
                module->params.set("pool_stride_h", poolingLayer.second->params.get<int>("stride_h"));
1204
                module->params.set("pool_stride_w", poolingLayer.second->params.get<int>("stride_w"));
1205
                module->params.set("pool_pad_h", poolingLayer.second->params.get<int>("pad_h"));
1206
                module->params.set("pool_pad_w", poolingLayer.second->params.get<int>("pad_w"));
1207

1208
                String name = generateLayerName("torchMaxUnpooling");
1209
                int id = net.addLayer(name, "MaxUnpool", module->params);
1210
                net.connect(prevLayerId, 0, id, 0);
1211

1212
                CV_Assert(poolingLayer.first != -1);
1213
                net.connect(poolingLayer.first, 1, id, 1);
1214

1215
                return id;
1216
            }
1217
        }
1218

1219
        CV_Error(Error::StsInternal, "Unexpected torch container: " + module->thName);
1220
        return -1;
1221
    }
1222

1223
    void populateNet(Net net_)
1224
    {
1225
        CV_TRACE_FUNCTION();
1226

1227
        CV_Assert(rootModule == NULL);
1228
        cv::Ptr<Module> rootModule_ = cv::makePtr<Module>("Sequential");
1229
        rootModule = rootModule_.get();
1230
        curModule = rootModule;
1231

1232
        THFile_seek(file, 0);
1233
        readObject();
1234

1235
        net = net_;
1236
        std::vector<std::pair<int, Module*> > addedModules;
1237
        fill(rootModule, addedModules);
1238

1239
        rootModule = NULL;
1240
        curModule = NULL;
1241
    }
1242
};
1243

1244
Mat readTorchBlob(const String &filename, bool isBinary)
1245
{
1246
    TorchImporter importer(filename, isBinary);
1247
    importer.readObject();
1248
    CV_Assert(importer.tensors.size() == 1);
1249

1250
    return importer.tensors.begin()->second;
1251
}
1252

1253
Net readNetFromTorch(const String &model, bool isBinary)
1254
{
1255
    CV_TRACE_FUNCTION();
1256

1257
    TorchImporter importer(model, isBinary);
1258
    Net net;
1259
    importer.populateNet(net);
1260
    return net;
1261
}
1262

1263
CV__DNN_INLINE_NS_END
1264
}} // namespace
1265

1266
Product

Resources

Company