Path: blob/master/modules/dnn/test/test_torch_importer.cpp
16339 views
/*M///////////////////////////////////////////////////////////////////////////////////////1//2// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.3//4// By downloading, copying, installing or using the software you agree to this license.5// If you do not agree to this license, do not download, install,6// copy or use the software.7//8//9// License Agreement10// For Open Source Computer Vision Library11//12// Copyright (C) 2013, OpenCV Foundation, all rights reserved.13// Third party copyrights are property of their respective owners.14//15// Redistribution and use in source and binary forms, with or without modification,16// are permitted provided that the following conditions are met:17//18// * Redistribution's of source code must retain the above copyright notice,19// this list of conditions and the following disclaimer.20//21// * Redistribution's in binary form must reproduce the above copyright notice,22// this list of conditions and the following disclaimer in the documentation23// and/or other materials provided with the distribution.24//25// * The name of the copyright holders may not be used to endorse or promote products26// derived from this software without specific prior written permission.27//28// This software is provided by the copyright holders and contributors "as is" and29// any express or implied warranties, including, but not limited to, the implied30// warranties of merchantability and fitness for a particular purpose are disclaimed.31// In no event shall the Intel Corporation or contributors be liable for any direct,32// indirect, incidental, special, exemplary, or consequential damages33// (including, but not limited to, procurement of substitute goods or services;34// loss of use, data, or profits; or business interruption) however caused35// and on any theory of liability, whether in contract, strict liability,36// or tort (including negligence or otherwise) arising in any way out of37// the use of this software, even if advised of the possibility of such damage.38//39//M*/4041#include "test_precomp.hpp"42#include "npy_blob.hpp"43#include <opencv2/dnn/shape_utils.hpp>44#include <opencv2/dnn/layer.details.hpp> // CV_DNN_REGISTER_LAYER_CLASS4546namespace opencv_test47{4849using namespace std;50using namespace testing;51using namespace cv;52using namespace cv::dnn;5354template<typename TStr>55static std::string _tf(TStr filename, bool inTorchDir = true)56{57String path = "dnn/";58if (inTorchDir)59path += "torch/";60path += filename;61return findDataFile(path, false);62}6364TEST(Torch_Importer, simple_read)65{66Net net;67ASSERT_NO_THROW(net = readNetFromTorch(_tf("net_simple_net.txt"), false));68ASSERT_FALSE(net.empty());69}7071class Test_Torch_layers : public DNNTestLayer72{73public:74void runTorchNet(const String& prefix, String outLayerName = "",75bool check2ndBlob = false, bool isBinary = false,76double l1 = 0.0, double lInf = 0.0)77{78String suffix = (isBinary) ? ".dat" : ".txt";7980Mat inp, outRef;81ASSERT_NO_THROW( inp = readTorchBlob(_tf(prefix + "_input" + suffix), isBinary) );82ASSERT_NO_THROW( outRef = readTorchBlob(_tf(prefix + "_output" + suffix), isBinary) );8384checkBackend(backend, target, &inp, &outRef);8586Net net = readNetFromTorch(_tf(prefix + "_net" + suffix), isBinary);87ASSERT_FALSE(net.empty());8889net.setPreferableBackend(backend);90net.setPreferableTarget(target);9192if (outLayerName.empty())93outLayerName = net.getLayerNames().back();9495net.setInput(inp);96std::vector<Mat> outBlobs;97net.forward(outBlobs, outLayerName);98l1 = l1 ? l1 : default_l1;99lInf = lInf ? lInf : default_lInf;100normAssert(outRef, outBlobs[0], "", l1, lInf);101102if (check2ndBlob && backend != DNN_BACKEND_INFERENCE_ENGINE)103{104Mat out2 = outBlobs[1];105Mat ref2 = readTorchBlob(_tf(prefix + "_output_2" + suffix), isBinary);106normAssert(out2, ref2, "", l1, lInf);107}108}109};110111TEST_P(Test_Torch_layers, run_convolution)112{113// Output reference values are in range [23.4018, 72.0181]114double l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.08 : default_l1;115double lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.42 : default_lInf;116runTorchNet("net_conv", "", false, true, l1, lInf);117}118119TEST_P(Test_Torch_layers, run_pool_max)120{121if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)122throw SkipTestException("");123runTorchNet("net_pool_max", "", true);124}125126TEST_P(Test_Torch_layers, run_pool_ave)127{128runTorchNet("net_pool_ave");129}130131TEST_P(Test_Torch_layers, run_reshape_change_batch_size)132{133runTorchNet("net_reshape");134}135136TEST_P(Test_Torch_layers, run_reshape)137{138runTorchNet("net_reshape_batch");139runTorchNet("net_reshape_channels", "", false, true);140}141142TEST_P(Test_Torch_layers, run_reshape_single_sample)143{144// Reference output values in range [14.4586, 18.4492].145runTorchNet("net_reshape_single_sample", "", false, false,146(target == DNN_TARGET_MYRIAD || target == DNN_TARGET_OPENCL_FP16) ? 0.0073 : default_l1,147(target == DNN_TARGET_MYRIAD || target == DNN_TARGET_OPENCL_FP16) ? 0.025 : default_lInf);148}149150TEST_P(Test_Torch_layers, run_linear)151{152if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)153throw SkipTestException("");154runTorchNet("net_linear_2d");155}156157TEST_P(Test_Torch_layers, run_concat)158{159runTorchNet("net_concat", "l5_torchMerge");160}161162TEST_P(Test_Torch_layers, run_depth_concat)163{164runTorchNet("net_depth_concat", "", false, true, 0.0,165target == DNN_TARGET_OPENCL_FP16 ? 0.021 : 0.0);166}167168TEST_P(Test_Torch_layers, run_deconv)169{170runTorchNet("net_deconv");171}172173TEST_P(Test_Torch_layers, run_batch_norm)174{175runTorchNet("net_batch_norm", "", false, true);176}177178TEST_P(Test_Torch_layers, net_prelu)179{180runTorchNet("net_prelu");181}182183TEST_P(Test_Torch_layers, net_cadd_table)184{185runTorchNet("net_cadd_table");186}187188TEST_P(Test_Torch_layers, net_softmax)189{190runTorchNet("net_softmax");191runTorchNet("net_softmax_spatial");192}193194TEST_P(Test_Torch_layers, net_logsoftmax)195{196runTorchNet("net_logsoftmax");197runTorchNet("net_logsoftmax_spatial");198}199200TEST_P(Test_Torch_layers, net_lp_pooling)201{202runTorchNet("net_lp_pooling_square", "", false, true);203runTorchNet("net_lp_pooling_power", "", false, true);204}205206TEST_P(Test_Torch_layers, net_conv_gemm_lrn)207{208if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)209throw SkipTestException("");210runTorchNet("net_conv_gemm_lrn", "", false, true,211target == DNN_TARGET_OPENCL_FP16 ? 0.046 : 0.0,212target == DNN_TARGET_OPENCL_FP16 ? 0.023 : 0.0);213}214215TEST_P(Test_Torch_layers, net_inception_block)216{217#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE == 2018030000218if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)219throw SkipTestException("");220#endif221runTorchNet("net_inception_block", "", false, true);222}223224TEST_P(Test_Torch_layers, net_normalize)225{226runTorchNet("net_normalize", "", false, true);227}228229TEST_P(Test_Torch_layers, net_padding)230{231runTorchNet("net_padding", "", false, true);232runTorchNet("net_spatial_zero_padding", "", false, true);233runTorchNet("net_spatial_reflection_padding", "", false, true);234}235236TEST_P(Test_Torch_layers, net_non_spatial)237{238if (backend == DNN_BACKEND_INFERENCE_ENGINE &&239(target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))240throw SkipTestException("");241runTorchNet("net_non_spatial", "", false, true);242}243244TEST_P(Test_Torch_layers, run_paralel)245{246if (backend != DNN_BACKEND_OPENCV || target != DNN_TARGET_CPU)247throw SkipTestException("");248runTorchNet("net_parallel", "l5_torchMerge");249}250251TEST_P(Test_Torch_layers, net_residual)252{253runTorchNet("net_residual", "", false, true);254}255256class Test_Torch_nets : public DNNTestLayer {};257258TEST_P(Test_Torch_nets, OpenFace_accuracy)259{260#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE < 2018030000261if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)262throw SkipTestException("Test is enabled starts from OpenVINO 2018R3");263#endif264checkBackend();265if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16)266throw SkipTestException("");267268const string model = findDataFile("dnn/openface_nn4.small2.v1.t7", false);269Net net = readNetFromTorch(model);270271net.setPreferableBackend(backend);272net.setPreferableTarget(target);273274Mat sample = imread(findDataFile("cv/shared/lena.png", false));275Mat sampleF32(sample.size(), CV_32FC3);276sample.convertTo(sampleF32, sampleF32.type());277sampleF32 /= 255;278resize(sampleF32, sampleF32, Size(96, 96), 0, 0, INTER_NEAREST);279280Mat inputBlob = blobFromImage(sampleF32, 1.0, Size(), Scalar(), /*swapRB*/true);281282net.setInput(inputBlob);283Mat out = net.forward();284285Mat outRef = readTorchBlob(_tf("net_openface_output.dat"), true);286normAssert(out, outRef, "", default_l1, default_lInf);287}288289static Mat getSegmMask(const Mat& scores)290{291const int rows = scores.size[2];292const int cols = scores.size[3];293const int numClasses = scores.size[1];294295Mat maxCl = Mat::zeros(rows, cols, CV_8UC1);296Mat maxVal(rows, cols, CV_32FC1, Scalar(0));297for (int ch = 0; ch < numClasses; ch++)298{299for (int row = 0; row < rows; row++)300{301const float *ptrScore = scores.ptr<float>(0, ch, row);302uint8_t *ptrMaxCl = maxCl.ptr<uint8_t>(row);303float *ptrMaxVal = maxVal.ptr<float>(row);304for (int col = 0; col < cols; col++)305{306if (ptrScore[col] > ptrMaxVal[col])307{308ptrMaxVal[col] = ptrScore[col];309ptrMaxCl[col] = (uchar)ch;310}311}312}313}314return maxCl;315}316317// Computer per-class intersection over union metric.318static void normAssertSegmentation(const Mat& ref, const Mat& test)319{320CV_Assert_N(ref.dims == 4, test.dims == 4);321const int numClasses = ref.size[1];322CV_Assert(numClasses == test.size[1]);323324Mat refMask = getSegmMask(ref);325Mat testMask = getSegmMask(test);326EXPECT_EQ(countNonZero(refMask != testMask), 0);327}328329TEST_P(Test_Torch_nets, ENet_accuracy)330{331checkBackend();332if (backend == DNN_BACKEND_INFERENCE_ENGINE ||333(backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))334throw SkipTestException("");335336Net net;337{338const string model = findDataFile("dnn/Enet-model-best.net", false);339net = readNetFromTorch(model, true);340ASSERT_TRUE(!net.empty());341}342343net.setPreferableBackend(backend);344net.setPreferableTarget(target);345346Mat sample = imread(_tf("street.png", false));347Mat inputBlob = blobFromImage(sample, 1./255, Size(), Scalar(), /*swapRB*/true);348349net.setInput(inputBlob, "");350Mat out = net.forward();351Mat ref = blobFromNPY(_tf("torch_enet_prob.npy", false));352// Due to numerical instability in Pooling-Unpooling layers (indexes jittering)353// thresholds for ENet must be changed. Accuracy of results was checked on354// Cityscapes dataset and difference in mIOU with Torch is 10E-4%355normAssert(ref, out, "", 0.00044, /*target == DNN_TARGET_CPU ? 0.453 : */0.552);356normAssertSegmentation(ref, out);357358const int N = 3;359for (int i = 0; i < N; i++)360{361net.setInput(inputBlob, "");362Mat out = net.forward();363normAssert(ref, out, "", 0.00044, /*target == DNN_TARGET_CPU ? 0.453 : */0.552);364normAssertSegmentation(ref, out);365}366}367368// Check accuracy of style transfer models from https://github.com/jcjohnson/fast-neural-style369// th fast_neural_style.lua \370// -input_image ~/opencv_extra/testdata/dnn/googlenet_1.png \371// -output_image lena.png \372// -median_filter 0 \373// -image_size 0 \374// -model models/eccv16/starry_night.t7375// th fast_neural_style.lua \376// -input_image ~/opencv_extra/testdata/dnn/googlenet_1.png \377// -output_image lena.png \378// -median_filter 0 \379// -image_size 0 \380// -model models/instance_norm/feathers.t7381TEST_P(Test_Torch_nets, FastNeuralStyle_accuracy)382{383checkBackend();384std::string models[] = {"dnn/fast_neural_style_eccv16_starry_night.t7",385"dnn/fast_neural_style_instance_norm_feathers.t7"};386std::string targets[] = {"dnn/lena_starry_night.png", "dnn/lena_feathers.png"};387388for (int i = 0; i < 2; ++i)389{390const string model = findDataFile(models[i], false);391Net net = readNetFromTorch(model);392393net.setPreferableBackend(backend);394net.setPreferableTarget(target);395396Mat img = imread(findDataFile("dnn/googlenet_1.png", false));397Mat inputBlob = blobFromImage(img, 1.0, Size(), Scalar(103.939, 116.779, 123.68), false);398399net.setInput(inputBlob);400Mat out = net.forward();401402// Deprocessing.403getPlane(out, 0, 0) += 103.939;404getPlane(out, 0, 1) += 116.779;405getPlane(out, 0, 2) += 123.68;406out = cv::min(cv::max(0, out), 255);407408Mat ref = imread(findDataFile(targets[i]));409Mat refBlob = blobFromImage(ref, 1.0, Size(), Scalar(), false);410411if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD)412{413double normL1 = cvtest::norm(refBlob, out, cv::NORM_L1) / refBlob.total();414if (target == DNN_TARGET_MYRIAD)415EXPECT_LE(normL1, 4.0f);416else417EXPECT_LE(normL1, 0.6f);418}419else420normAssert(out, refBlob, "", 0.5, 1.1);421}422}423424INSTANTIATE_TEST_CASE_P(/**/, Test_Torch_nets, dnnBackendsAndTargets());425426// Test a custom layer427// https://github.com/torch/nn/blob/master/doc/convolution.md#nn.SpatialUpSamplingNearest428class SpatialUpSamplingNearestLayer CV_FINAL : public Layer429{430public:431SpatialUpSamplingNearestLayer(const LayerParams ¶ms) : Layer(params)432{433scale = params.get<int>("scale_factor");434}435436static Ptr<Layer> create(LayerParams& params)437{438return Ptr<Layer>(new SpatialUpSamplingNearestLayer(params));439}440441virtual bool getMemoryShapes(const std::vector<std::vector<int> > &inputs,442const int requiredOutputs,443std::vector<std::vector<int> > &outputs,444std::vector<std::vector<int> > &internals) const CV_OVERRIDE445{446std::vector<int> outShape(4);447outShape[0] = inputs[0][0]; // batch size448outShape[1] = inputs[0][1]; // number of channels449outShape[2] = scale * inputs[0][2];450outShape[3] = scale * inputs[0][3];451outputs.assign(1, outShape);452return false;453}454455void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays) CV_OVERRIDE456{457CV_TRACE_FUNCTION();458CV_TRACE_ARG_VALUE(name, "name", name.c_str());459460std::vector<Mat> inputs, outputs;461inputs_arr.getMatVector(inputs);462outputs_arr.getMatVector(outputs);463464Mat& inp = inputs[0];465Mat& out = outputs[0];466const int outHeight = out.size[2];467const int outWidth = out.size[3];468for (size_t n = 0; n < inp.size[0]; ++n)469{470for (size_t ch = 0; ch < inp.size[1]; ++ch)471{472resize(getPlane(inp, n, ch), getPlane(out, n, ch),473Size(outWidth, outHeight), 0, 0, INTER_NEAREST);474}475}476}477478private:479int scale;480};481482TEST_P(Test_Torch_layers, upsampling_nearest)483{484// Test a custom layer.485CV_DNN_REGISTER_LAYER_CLASS(SpatialUpSamplingNearest, SpatialUpSamplingNearestLayer);486try487{488runTorchNet("net_spatial_upsampling_nearest", "", false, true);489}490catch (...)491{492LayerFactory::unregisterLayer("SpatialUpSamplingNearest");493throw;494}495LayerFactory::unregisterLayer("SpatialUpSamplingNearest");496497// Test an implemented layer.498runTorchNet("net_spatial_upsampling_nearest", "", false, true);499}500501INSTANTIATE_TEST_CASE_P(/**/, Test_Torch_layers, dnnBackendsAndTargets());502503}504505506