Path: blob/master/modules/dnn/src/layers/pooling_layer.cpp
16337 views
/*M///////////////////////////////////////////////////////////////////////////////////////1//2// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.3//4// By downloading, copying, installing or using the software you agree to this license.5// If you do not agree to this license, do not download, install,6// copy or use the software.7//8//9// License Agreement10// For Open Source Computer Vision Library11//12// Copyright (C) 2013, OpenCV Foundation, all rights reserved.13// Copyright (C) 2017, Intel Corporation, all rights reserved.14// Third party copyrights are property of their respective owners.15//16// Redistribution and use in source and binary forms, with or without modification,17// are permitted provided that the following conditions are met:18//19// * Redistribution's of source code must retain the above copyright notice,20// this list of conditions and the following disclaimer.21//22// * Redistribution's in binary form must reproduce the above copyright notice,23// this list of conditions and the following disclaimer in the documentation24// and/or other materials provided with the distribution.25//26// * The name of the copyright holders may not be used to endorse or promote products27// derived from this software without specific prior written permission.28//29// This software is provided by the copyright holders and contributors "as is" and30// any express or implied warranties, including, but not limited to, the implied31// warranties of merchantability and fitness for a particular purpose are disclaimed.32// In no event shall the Intel Corporation or contributors be liable for any direct,33// indirect, incidental, special, exemplary, or consequential damages34// (including, but not limited to, procurement of substitute goods or services;35// loss of use, data, or profits; or business interruption) however caused36// and on any theory of liability, whether in contract, strict liability,37// or tort (including negligence or otherwise) arising in any way out of38// the use of this software, even if advised of the possibility of such damage.39//40//M*/4142#include "../precomp.hpp"43#include "layers_common.hpp"44#include "opencv2/core/hal/intrin.hpp"45#include "../op_halide.hpp"46#include "../op_inf_engine.hpp"47#include "../op_vkcom.hpp"48#include <float.h>49#include <algorithm>50using std::max;51using std::min;5253#ifdef HAVE_OPENCL54#include "opencl_kernels_dnn.hpp"55using namespace cv::dnn::ocl4dnn;56#endif5758namespace cv59{60namespace dnn61{62static inline int roundRoiSize(float v)63{64return (int)(v + (v >= 0.f ? 0.5f : -0.5f));65}6667class PoolingLayerImpl CV_FINAL : public PoolingLayer68{69public:70PoolingLayerImpl(const LayerParams& params)71{72computeMaxIdx = true;73globalPooling = false;74stride = Size(1, 1);7576if (params.has("pool") || params.has("kernel_size") ||77params.has("kernel_w") || params.has("kernel_h"))78{79String pool = toLowerCase(params.get<String>("pool", "max"));80if (pool == "max")81type = MAX;82else if (pool == "ave")83type = AVE;84else if (pool == "stochastic")85type = STOCHASTIC;86else87CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\"");8889getPoolingKernelParams(params, kernel.height, kernel.width, globalPooling,90pad_t, pad_l, pad_b, pad_r, stride.height, stride.width, padMode);9192pad.width = pad_l;93pad.height = pad_t;94}95else if (params.has("pooled_w") || params.has("pooled_h"))96{97type = ROI;98computeMaxIdx = false;99pooledSize.width = params.get<uint32_t>("pooled_w", 1);100pooledSize.height = params.get<uint32_t>("pooled_h", 1);101}102else if (params.has("output_dim") && params.has("group_size"))103{104type = PSROI;105pooledSize.width = params.get<int>("group_size");106pooledSize.height = pooledSize.width;107psRoiOutChannels = params.get<int>("output_dim");108}109else110CV_Error(Error::StsBadArg, "Cannot determine pooling type");111setParamsFrom(params);112ceilMode = params.get<bool>("ceil_mode", true);113spatialScale = params.get<float>("spatial_scale", 1);114avePoolPaddedArea = params.get<bool>("ave_pool_padded_area", true);115}116117#ifdef HAVE_OPENCL118Ptr<OCL4DNNPool<float> > poolOp;119#endif120121void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE122{123std::vector<Mat> inputs, outputs;124inputs_arr.getMatVector(inputs);125outputs_arr.getMatVector(outputs);126127CV_Assert(!inputs.empty());128129cv::Size inp(inputs[0].size[3], inputs[0].size[2]),130out(outputs[0].size[3], outputs[0].size[2]);131132if(globalPooling)133{134kernel = inp;135}136137getConvPoolPaddings(inp, out, kernel, stride, padMode, Size(1, 1), pad_t, pad_l, pad_b, pad_r);138pad.width = pad_l;139pad.height = pad_t;140141#ifdef HAVE_OPENCL142poolOp.release();143#endif144}145146virtual bool supportBackend(int backendId) CV_OVERRIDE147{148if (backendId == DNN_BACKEND_INFERENCE_ENGINE)149{150if (preferableTarget == DNN_TARGET_MYRIAD)151return type == MAX || type == AVE;152else153return type != STOCHASTIC;154}155else156return backendId == DNN_BACKEND_OPENCV ||157backendId == DNN_BACKEND_HALIDE && haveHalide() &&158(type == MAX || type == AVE && !pad_t && !pad_l && !pad_b && !pad_r) ||159backendId == DNN_BACKEND_VKCOM && haveVulkan() &&160(type == MAX || type == AVE);161}162163#ifdef HAVE_OPENCL164bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, InputArrayOfArrays internals)165{166std::vector<UMat> inputs;167std::vector<UMat> outputs;168169bool use_half = (inps.depth() == CV_16S);170inps.getUMatVector(inputs);171outs.getUMatVector(outputs);172173if (poolOp.empty())174{175OCL4DNNPoolConfig config;176177config.in_shape = shape(inputs[0]);178config.out_shape = shape(outputs[0]);179config.kernel = kernel;180config.pad_l = pad_l;181config.pad_t = pad_t;182config.pad_r = pad_r;183config.pad_b = pad_b;184config.stride = stride;185config.channels = inputs[0].size[1];186config.pool_method = type == MAX ? LIBDNN_POOLING_METHOD_MAX :187(type == AVE ? LIBDNN_POOLING_METHOD_AVE :188LIBDNN_POOLING_METHOD_STO);189config.avePoolPaddedArea = avePoolPaddedArea;190config.computeMaxIdx = computeMaxIdx;191config.use_half = use_half;192poolOp = Ptr<OCL4DNNPool<float> >(new OCL4DNNPool<float>(config));193}194195for (size_t ii = 0; ii < inputs.size(); ii++)196{197UMat& inpMat = inputs[ii];198int out_index = (type == MAX) ? 2 : 1;199UMat& outMat = outputs[out_index * ii];200UMat maskMat = (type == MAX) ? outputs[2 * ii + 1] : UMat();201202CV_Assert(inpMat.offset == 0 && outMat.offset == 0);203204if (!poolOp->Forward(inpMat, outMat, maskMat))205return false;206}207return true;208}209#endif210211void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE212{213CV_TRACE_FUNCTION();214CV_TRACE_ARG_VALUE(name, "name", name.c_str());215216if (type == MAX || type == AVE || type == STOCHASTIC)217{218CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),219forward_ocl(inputs_arr, outputs_arr, internals_arr))220}221if (inputs_arr.depth() == CV_16S)222{223forward_fallback(inputs_arr, outputs_arr, internals_arr);224return;225}226227std::vector<Mat> inputs, outputs;228inputs_arr.getMatVector(inputs);229outputs_arr.getMatVector(outputs);230231switch (type)232{233case MAX:234CV_Assert_N(inputs.size() == 1, outputs.size() == 2);235maxPooling(inputs[0], outputs[0], outputs[1]);236break;237case AVE:238CV_Assert_N(inputs.size() == 1, outputs.size() == 1);239avePooling(inputs[0], outputs[0]);240break;241case ROI: case PSROI:242CV_Assert_N(inputs.size() == 2, outputs.size() == 1);243roiPooling(inputs[0], inputs[1], outputs[0]);244break;245default:246CV_Error(Error::StsNotImplemented, "Not implemented");247break;248}249}250251virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE252{253#ifdef HAVE_VULKAN254int padding_mode;255vkcom::PoolType pool_type;256int filter_size[2] = {kernel.height, kernel.width};257int pad_size[2] = {pad.height, pad.width};258int stride_size[2] = {stride.height, stride.width};259pool_type = type == MAX ? vkcom::kPoolTypeMax:260(type == AVE ? vkcom::kPoolTypeAvg:261vkcom::kPoolTypeNum);262263if (padMode.empty())264{265padding_mode = vkcom::kPaddingModeCaffe;266}267else if (padMode == "VALID")268{269padding_mode = vkcom::kPaddingModeValid;270}271else if (padMode == "SAME")272{273padding_mode = vkcom::kPaddingModeSame;274}275else276CV_Error(Error::StsError, "Unsupported padding mode " + padMode);277278std::shared_ptr<vkcom::OpBase> op(new vkcom::OpPool(filter_size, pad_size,279stride_size, padding_mode,280pool_type, avePoolPaddedArea));281return Ptr<BackendNode>(new VkComBackendNode(inputs, op));282#endif283return Ptr<BackendNode>();284}285286virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE287{288if (type == MAX)289return initMaxPoolingHalide(inputs);290else if (type == AVE)291return initAvePoolingHalide(inputs);292else293return Ptr<BackendNode>();294}295296virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE297{298#ifdef HAVE_INF_ENGINE299InferenceEngine::LayerParams lp;300lp.name = name;301lp.precision = InferenceEngine::Precision::FP32;302303std::shared_ptr<InferenceEngine::CNNLayer> ieLayer;304if (type == MAX || type == AVE)305{306lp.type = "Pooling";307InferenceEngine::PoolingLayer* poolLayer = new InferenceEngine::PoolingLayer(lp);308#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2018R3)309poolLayer->_kernel.insert(InferenceEngine::X_AXIS, kernel.width);310poolLayer->_kernel.insert(InferenceEngine::Y_AXIS, kernel.height);311poolLayer->_stride.insert(InferenceEngine::X_AXIS, stride.width);312poolLayer->_stride.insert(InferenceEngine::Y_AXIS, stride.height);313poolLayer->_padding.insert(InferenceEngine::X_AXIS, pad_l);314poolLayer->_padding.insert(InferenceEngine::Y_AXIS, pad_t);315poolLayer->_pads_end.insert(InferenceEngine::X_AXIS, pad_r);316poolLayer->_pads_end.insert(InferenceEngine::Y_AXIS, pad_b);317#else318poolLayer->_kernel_x = kernel.width;319poolLayer->_kernel_y = kernel.height;320poolLayer->_stride_x = stride.width;321poolLayer->_stride_y = stride.height;322poolLayer->_padding_x = pad_l;323poolLayer->_padding_y = pad_t;324poolLayer->params["pad-r"] = format("%d", pad_r);325poolLayer->params["pad-b"] = format("%d", pad_b);326#endif327poolLayer->_exclude_pad = type == AVE && padMode == "SAME";328poolLayer->params["rounding-type"] = ceilMode ? "ceil" : "floor";329poolLayer->_type = type == MAX ? InferenceEngine::PoolingLayer::PoolType::MAX :330InferenceEngine::PoolingLayer::PoolType::AVG;331ieLayer = std::shared_ptr<InferenceEngine::CNNLayer>(poolLayer);332}333else if (type == ROI)334{335lp.type = "ROIPooling";336ieLayer = std::shared_ptr<InferenceEngine::CNNLayer>(new InferenceEngine::CNNLayer(lp));337ieLayer->params["pooled_w"] = format("%d", pooledSize.width);338ieLayer->params["pooled_h"] = format("%d", pooledSize.height);339ieLayer->params["spatial_scale"] = format("%f", spatialScale);340}341else if (type == PSROI)342{343lp.type = "PSROIPooling";344ieLayer = std::shared_ptr<InferenceEngine::CNNLayer>(new InferenceEngine::CNNLayer(lp));345ieLayer->params["output_dim"] = format("%d", psRoiOutChannels);346ieLayer->params["group_size"] = format("%d", pooledSize.width);347ieLayer->params["spatial_scale"] = format("%f", spatialScale);348}349else350CV_Error(Error::StsNotImplemented, "Unsupported pooling type");351352return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));353#endif // HAVE_INF_ENGINE354return Ptr<BackendNode>();355}356357358class PoolingInvoker : public ParallelLoopBody359{360public:361const Mat* src, *rois;362Mat *dst, *mask;363Size kernel, stride;364int pad_l, pad_t, pad_r, pad_b;365bool avePoolPaddedArea;366int nstripes;367bool computeMaxIdx;368std::vector<int> ofsbuf;369int poolingType;370float spatialScale;371372PoolingInvoker() : src(0), rois(0), dst(0), mask(0), pad_l(0), pad_t(0), pad_r(0), pad_b(0),373avePoolPaddedArea(false), nstripes(0),374computeMaxIdx(0), poolingType(MAX), spatialScale(0) {}375376static void run(const Mat& src, const Mat& rois, Mat& dst, Mat& mask, Size kernel,377Size stride, int pad_l, int pad_t, int pad_r, int pad_b, bool avePoolPaddedArea, int poolingType, float spatialScale,378bool computeMaxIdx, int nstripes)379{380CV_Assert_N(381src.isContinuous(), dst.isContinuous(),382src.type() == CV_32F, src.type() == dst.type(),383src.dims == 4, dst.dims == 4,384((poolingType == ROI || poolingType == PSROI) && dst.size[0] ==rois.size[0] || src.size[0] == dst.size[0]),385poolingType == PSROI || src.size[1] == dst.size[1],386(mask.empty() || (mask.type() == src.type() && mask.size == dst.size)));387388PoolingInvoker p;389390p.src = &src;391p.rois = &rois;392p.dst = &dst;393p.mask = &mask;394p.kernel = kernel;395p.stride = stride;396p.pad_l = pad_l;397p.pad_t = pad_t;398p.pad_r = pad_r;399p.pad_b = pad_b;400p.avePoolPaddedArea = avePoolPaddedArea;401p.nstripes = nstripes;402p.computeMaxIdx = computeMaxIdx;403p.poolingType = poolingType;404p.spatialScale = spatialScale;405406if( !computeMaxIdx )407{408p.ofsbuf.resize(kernel.width*kernel.height);409for( int i = 0; i < kernel.height; i++ )410for( int j = 0; j < kernel.width; j++ )411p.ofsbuf[i*kernel.width + j] = src.size[3]*i + j;412}413414parallel_for_(Range(0, nstripes), p, nstripes);415}416417void operator()(const Range& r) const CV_OVERRIDE418{419int channels = dst->size[1], width = dst->size[3], height = dst->size[2];420int inp_width = src->size[3], inp_height = src->size[2];421size_t total = dst->total();422size_t stripeSize = (total + nstripes - 1)/nstripes;423size_t stripeStart = r.start*stripeSize;424size_t stripeEnd = std::min(r.end*stripeSize, total);425int kernel_w = kernel.width, kernel_h = kernel.height;426int stride_w = stride.width, stride_h = stride.height;427bool compMaxIdx = computeMaxIdx;428429#if CV_SIMD128430const int* ofsptr = ofsbuf.empty() ? 0 : (const int*)&ofsbuf[0];431if (poolingType == MAX && !compMaxIdx && !ofsptr)432CV_Error(Error::StsBadArg, "ofsbuf should be initialized in this mode");433v_float32x4 idx00(0.f, (float)stride_w, (float)(stride_w*2), (float)(stride_w*3));434v_float32x4 ones = v_setall_f32(1.f);435v_float32x4 idx_delta = v_setall_f32((float)(inp_width - kernel_w));436#endif437438for( size_t ofs0 = stripeStart; ofs0 < stripeEnd; )439{440size_t ofs = ofs0;441int x0 = (int)(ofs % width);442ofs /= width;443int y0 = (int)(ofs % height);444ofs /= height;445int c = (int)(ofs % channels);446int n = (int)(ofs / channels);447int ystart, yend;448449const float *srcData = 0;450if (poolingType == ROI)451{452const float *roisData = rois->ptr<float>(n);453int ystartROI = roundRoiSize(roisData[2] * spatialScale);454int yendROI = roundRoiSize(roisData[4] * spatialScale);455int roiHeight = std::max(yendROI - ystartROI + 1, 1);456float roiRatio = (float)roiHeight / height;457458ystart = ystartROI + y0 * roiRatio;459yend = ystartROI + std::ceil((y0 + 1) * roiRatio);460461CV_Assert(roisData[0] < src->size[0]);462srcData = src->ptr<float>(roisData[0], c);463}464else if (poolingType == PSROI)465{466const float *roisData = rois->ptr<float>(n);467float ystartROI = roundRoiSize(roisData[2]) * spatialScale;468float yendROI = roundRoiSize(roisData[4] + 1) * spatialScale;469float roiHeight = std::max(yendROI - ystartROI, 0.1f);470float roiRatio = roiHeight / height;471472ystart = (int)std::floor(ystartROI + y0 * roiRatio);473yend = (int)std::ceil(ystartROI + (y0 + 1) * roiRatio);474}475else476{477ystart = y0 * stride_h - pad_t;478yend = min(ystart + kernel_h, inp_height + pad_b);479srcData = src->ptr<float>(n, c);480}481int ydelta = yend - ystart;482ystart = max(ystart, 0);483yend = min(yend, inp_height);484float *dstData = dst->ptr<float>(n, c, y0);485float *dstMaskData = mask->data ? mask->ptr<float>(n, c, y0) : 0;486487int delta = std::min((int)(stripeEnd - ofs0), width - x0);488ofs0 += delta;489int x1 = x0 + delta;490491if( poolingType == MAX)492for( ; x0 < x1; x0++ )493{494int xstart = x0 * stride_w - pad_l;495int xend = min(xstart + kernel_w, inp_width);496xstart = max(xstart, 0);497if (xstart >= xend || ystart >= yend)498{499dstData[x0] = 0;500if (compMaxIdx && dstMaskData)501dstMaskData[x0] = -1;502continue;503}504#if CV_SIMD128505if( xstart > 0 && x0 + 7 < x1 && (x0 + 7) * stride_w - pad_l + kernel_w < inp_width )506{507if( compMaxIdx )508{509v_float32x4 max_val0 = v_setall_f32(-FLT_MAX);510v_float32x4 max_val1 = max_val0;511v_float32x4 max_idx0 = v_setall_f32(-1.f);512v_float32x4 max_idx1 = max_idx0;513int index0 = ystart * inp_width + xstart;514v_float32x4 idx0 = idx00 + v_setall_f32((float)index0);515v_float32x4 idx1 = idx0 + v_setall_f32((float)(stride_w*4));516517for (int y = ystart; y < yend; ++y)518{519for (int x = xstart; x < xend; ++x, idx0 += ones, idx1 += ones)520{521const int index = y * inp_width + x;522v_float32x4 v0(srcData[index], srcData[index + stride_w],523srcData[index + stride_w*2], srcData[index + stride_w*3]);524v_float32x4 v1(srcData[index + stride_w*4], srcData[index + stride_w*5],525srcData[index + stride_w*6], srcData[index + stride_w*7]);526max_idx0 = v_select(v0 > max_val0, idx0, max_idx0);527max_idx1 = v_select(v1 > max_val1, idx1, max_idx1);528max_val0 = v_max(max_val0, v0);529max_val1 = v_max(max_val1, v1);530}531idx0 += idx_delta;532idx1 += idx_delta;533}534v_store(dstData + x0, max_val0);535v_store(dstData + x0 + 4, max_val1);536if (dstMaskData)537{538v_store(dstMaskData + x0, max_idx0);539v_store(dstMaskData + x0 + 4, max_idx1);540}541x0 += 7;542}543else544{545v_float32x4 max_val0 = v_setall_f32(-FLT_MAX);546v_float32x4 max_val1 = max_val0;547548if( yend - ystart == kernel_h )549{550const float* srcData1 = srcData + ystart*inp_width + xstart;551if( stride_w == 1 )552for (int k = 0; k < kernel_w*kernel_h; k++)553{554int index = ofsptr[k];555v_float32x4 v0 = v_load(srcData1 + index);556v_float32x4 v1 = v_load(srcData1 + index + 4);557max_val0 = v_max(max_val0, v0);558max_val1 = v_max(max_val1, v1);559}560else if( stride_w == 2 )561for (int k = 0; k < kernel_w*kernel_h; k++)562{563int index = ofsptr[k];564v_float32x4 v0, v1, dummy;565v_load_deinterleave(srcData1 + index, v0, dummy); // f0 f2 f4 f6 ,f1 f3 f5 f7566v_load_deinterleave(srcData1 + index + 8, v1, dummy); // f8 f10 f12 f14 ,f9 f11 f13 f15567max_val0 = v_max(max_val0, v0);568max_val1 = v_max(max_val1, v1);569}570else571for (int k = 0; k < kernel_w*kernel_h; k++)572{573int index = ofsptr[k];574v_float32x4 v0(srcData1[index], srcData1[index + stride_w],575srcData1[index + stride_w*2], srcData1[index + stride_w*3]);576v_float32x4 v1(srcData1[index + stride_w*4], srcData1[index + stride_w*5],577srcData1[index + stride_w*6], srcData1[index + stride_w*7]);578max_val0 = v_max(max_val0, v0);579max_val1 = v_max(max_val1, v1);580}581}582else583{584for (int y = ystart; y < yend; ++y)585{586for (int x = xstart; x < xend; ++x)587{588const int index = y * inp_width + x;589v_float32x4 v0(srcData[index], srcData[index + stride_w],590srcData[index + stride_w*2], srcData[index + stride_w*3]);591v_float32x4 v1(srcData[index + stride_w*4], srcData[index + stride_w*5],592srcData[index + stride_w*6], srcData[index + stride_w*7]);593max_val0 = v_max(max_val0, v0);594max_val1 = v_max(max_val1, v1);595}596}597}598v_store(dstData + x0, max_val0);599v_store(dstData + x0 + 4, max_val1);600x0 += 7;601}602}603else604#endif605{606float max_val = -FLT_MAX;607if( compMaxIdx )608{609int max_index = -1;610for (int y = ystart; y < yend; ++y)611for (int x = xstart; x < xend; ++x)612{613const int index = y * inp_width + x;614float val = srcData[index];615if (val > max_val)616{617max_val = val;618max_index = index;619}620}621622dstData[x0] = max_val;623if (dstMaskData)624dstMaskData[x0] = max_index;625}626else627{628for (int y = ystart; y < yend; ++y)629for (int x = xstart; x < xend; ++x)630{631const int index = y * inp_width + x;632float val = srcData[index];633max_val = std::max(max_val, val);634}635636dstData[x0] = max_val;637}638}639}640else if (poolingType == AVE)641{642for( ; x0 < x1; x0++ )643{644int xstart = x0 * stride_w - pad_l;645int xend = min(xstart + kernel_w, inp_width + pad_r);646int xdelta = xend - xstart;647xstart = max(xstart, 0);648xend = min(xend, inp_width);649float inv_kernel_area = avePoolPaddedArea ? xdelta * ydelta : ((yend - ystart) * (xend - xstart));650inv_kernel_area = 1.0 / inv_kernel_area;651#if CV_SIMD128652if( xstart > 0 && x0 + 7 < x1 && (x0 + 7) * stride_w - pad_l + kernel_w < inp_width )653{654v_float32x4 sum_val0 = v_setzero_f32(), sum_val1 = v_setzero_f32();655v_float32x4 ikarea = v_setall_f32(inv_kernel_area);656657for (int y = ystart; y < yend; ++y)658{659for (int x = xstart; x < xend; ++x)660{661const int index = y * inp_width + x;662v_float32x4 v0(srcData[index], srcData[index + stride_w],663srcData[index + stride_w*2], srcData[index + stride_w*3]);664v_float32x4 v1(srcData[index + stride_w*4], srcData[index + stride_w*5],665srcData[index + stride_w*6], srcData[index + stride_w*7]);666sum_val0 += v0;667sum_val1 += v1;668}669}670v_store(dstData + x0, sum_val0*ikarea);671v_store(dstData + x0 + 4, sum_val1*ikarea);672x0 += 7;673}674else675#endif676{677float sum_val = 0.f;678for (int y = ystart; y < yend; ++y)679for (int x = xstart; x < xend; ++x)680{681const int index = y * inp_width + x;682float val = srcData[index];683sum_val += val;684}685686dstData[x0] = sum_val*inv_kernel_area;687}688}689}690else if (poolingType == ROI)691{692const float *roisData = rois->ptr<float>(n);693int xstartROI = roundRoiSize(roisData[1] * spatialScale);694int xendROI = roundRoiSize(roisData[3] * spatialScale);695int roiWidth = std::max(xendROI - xstartROI + 1, 1);696float roiRatio = (float)roiWidth / width;697for( ; x0 < x1; x0++ )698{699int xstart = xstartROI + x0 * roiRatio;700int xend = xstartROI + std::ceil((x0 + 1) * roiRatio);701xstart = max(xstart, 0);702xend = min(xend, inp_width);703if (xstart >= xend || ystart >= yend)704{705dstData[x0] = 0;706if (compMaxIdx && dstMaskData)707dstMaskData[x0] = -1;708continue;709}710float max_val = -FLT_MAX;711for (int y = ystart; y < yend; ++y)712for (int x = xstart; x < xend; ++x)713{714const int index = y * inp_width + x;715float val = srcData[index];716max_val = std::max(max_val, val);717}718dstData[x0] = max_val;719}720}721else // PSROI722{723const float *roisData = rois->ptr<float>(n);724CV_Assert(roisData[0] < src->size[0]);725float xstartROI = roundRoiSize(roisData[1]) * spatialScale;726float xendROI = roundRoiSize(roisData[3] + 1) * spatialScale;727float roiWidth = std::max(xendROI - xstartROI, 0.1f);728float roiRatio = roiWidth / width;729for( ; x0 < x1; x0++ )730{731int xstart = (int)std::floor(xstartROI + x0 * roiRatio);732int xend = (int)std::ceil(xstartROI + (x0 + 1) * roiRatio);733xstart = max(xstart, 0);734xend = min(xend, inp_width);735if (xstart >= xend || ystart >= yend)736{737dstData[x0] = 0;738continue;739}740741srcData = src->ptr<float>(roisData[0], (c * height + y0) * width + x0);742float sum_val = 0.f;743for (int y = ystart; y < yend; ++y)744for (int x = xstart; x < xend; ++x)745{746const int index = y * inp_width + x;747float val = srcData[index];748sum_val += val;749}750dstData[x0] = sum_val / ((yend - ystart) * (xend - xstart));751}752}753}754}755};756757void maxPooling(Mat &src, Mat &dst, Mat &mask)758{759const int nstripes = getNumThreads();760Mat rois;761PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad_l, pad_t, pad_r, pad_b, avePoolPaddedArea, type, spatialScale, computeMaxIdx, nstripes);762}763764void avePooling(Mat &src, Mat &dst)765{766const int nstripes = getNumThreads();767Mat rois, mask;768PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad_l, pad_t, pad_r, pad_b, avePoolPaddedArea, type, spatialScale, computeMaxIdx, nstripes);769}770771void roiPooling(const Mat &src, const Mat &rois, Mat &dst)772{773const int nstripes = getNumThreads();774Mat mask;775PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad_l, pad_t, pad_r, pad_b, avePoolPaddedArea, type, spatialScale, computeMaxIdx, nstripes);776}777778virtual Ptr<BackendNode> initMaxPoolingHalide(const std::vector<Ptr<BackendWrapper> > &inputs)779{780#ifdef HAVE_HALIDE781Halide::Buffer<float> inputBuffer = halideBuffer(inputs[0]);782const int inWidth = inputBuffer.width();783const int inHeight = inputBuffer.height();784785Halide::Var x("x"), y("y"), c("c"), n("n");786Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));787Halide::RDom r(0, kernel.width, 0, kernel.height);788Halide::Expr kx, ky;789if(pad_l || pad_t)790{791kx = clamp(x * stride.width + r.x - pad_l, 0, inWidth - 1);792ky = clamp(y * stride.height + r.y - pad_t, 0, inHeight - 1);793}794else795{796kx = min(x * stride.width + r.x, inWidth - 1);797ky = min(y * stride.height + r.y, inHeight - 1);798}799800// Halide::argmax returns tuple (r.x, r.y, max).801Halide::Tuple res = argmax(inputBuffer(kx, ky, c, n));802803// Compute offset from argmax in range [0, kernel_size).804Halide::Expr max_index;805if(pad_l || pad_t)806{807max_index = clamp(y * stride.height + res[1] - pad_t,8080, inHeight - 1) * inWidth +809clamp(x * stride.width + res[0] - pad_l,8100, inWidth - 1);811}812else813{814max_index = min(y * stride.height + res[1], inHeight - 1) * inWidth +815min(x * stride.width + res[0], inWidth - 1);816}817top(x, y, c, n) = { res[2], Halide::cast<float>(max_index) };818return Ptr<BackendNode>(new HalideBackendNode(top));819#endif // HAVE_HALIDE820return Ptr<BackendNode>();821}822823virtual Ptr<BackendNode> initAvePoolingHalide(const std::vector<Ptr<BackendWrapper> > &inputs)824{825#ifdef HAVE_HALIDE826Halide::Buffer<float> inputBuffer = halideBuffer(inputs[0]);827828const int inW = inputBuffer.width(), inH = inputBuffer.height();829if ((inW - kernel.width) % stride.width || (inH - kernel.height) % stride.height)830{831CV_Error(cv::Error::StsNotImplemented,832"Halide backend for average pooling with partial "833"kernels is not implemented");834}835836const float norm = 1.0f / (kernel.width * kernel.height);837838Halide::Var x("x"), y("y"), c("c"), n("n");839Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));840Halide::RDom r(0, kernel.width, 0, kernel.height);841top(x, y, c, n) = sum(842inputBuffer(x * stride.width + r.x,843y * stride.height + r.y, c, n)) * norm;844return Ptr<BackendNode>(new HalideBackendNode(top));845#endif // HAVE_HALIDE846return Ptr<BackendNode>();847}848849virtual void applyHalideScheduler(Ptr<BackendNode>& node,850const std::vector<Mat*> &inputs,851const std::vector<Mat> &outputs,852int targetId) const CV_OVERRIDE853{854#ifdef HAVE_HALIDE855if (targetId != DNN_TARGET_CPU)856{857Layer::applyHalideScheduler(node, inputs, outputs, targetId);858return;859}860Halide::Var x("x"), y("y"), c("c"), n("n"), tile("tile"),861xi("xi"), yi("yi"), ci("ci"), xo("xo"), yo("yo"), co("co");862Halide::Func& top = node.dynamicCast<HalideBackendNode>()->funcs.back();863864int outW, outH, outC, outN;865getCanonicalSize(outputs[0].size, &outW, &outH, &outC, &outN);866867if (outW < 8 || outH < 8)868{869if (outC > 8)870top.split(c, co, ci, 8)871.fuse(x, y, tile).fuse(co, tile, tile).fuse(n, tile, tile)872.parallel(tile)873.vectorize(ci);874else875{876top.fuse(y, c, tile).fuse(n, tile, tile)877.parallel(tile);878if (outW > 1)879top.vectorize(x);880}881}882else883{884if (outC > 8)885top.split(x, xo, xi, 8).split(y, yo, yi, 8).split(c, co, ci, 8)886.fuse(xo, yo, tile).fuse(co, tile, tile).fuse(n, tile, tile)887.parallel(tile)888.vectorize(xi);889else890top.split(x, xo, xi, 8).split(y, yo, yi, 8)891.fuse(xo, yo, tile).fuse(c, tile, tile).fuse(n, tile, tile)892.parallel(tile)893.vectorize(xi);894}895#endif // HAVE_HALIDE896}897898bool getMemoryShapes(const std::vector<MatShape> &inputs,899const int requiredOutputs,900std::vector<MatShape> &outputs,901std::vector<MatShape> &internals) const CV_OVERRIDE902{903CV_Assert(inputs.size() != 0);904Size in(inputs[0][3], inputs[0][2]), out;905906if (globalPooling)907{908out.height = 1;909out.width = 1;910}911else if (type == ROI || type == PSROI)912{913out.height = pooledSize.height;914out.width = pooledSize.width;915}916else if (padMode.empty())917{918float height = (float)(in.height + pad_t + pad_b - kernel.height) / stride.height;919float width = (float)(in.width + pad_l + pad_r - kernel.width) / stride.width;920out.height = 1 + (ceilMode ? ceil(height) : floor(height));921out.width = 1 + (ceilMode ? ceil(width) : floor(width));922923if (pad_r || pad_b)924{925// If we have padding, ensure that the last pooling starts strictly926// inside the image (instead of at the padding); otherwise clip the last.927if ((out.height - 1) * stride.height >= in.height + pad_b)928--out.height;929if ((out.width - 1) * stride.width >= in.width + pad_r)930--out.width;931CV_Assert((out.height - 1) * stride.height < in.height + pad_b);932CV_Assert((out.width - 1) * stride.width < in.width + pad_r);933}934}935else936{937getConvPoolOutParams(in, kernel, stride, padMode, Size(1, 1), out);938}939940int dims[] = {inputs[0][0], inputs[0][1], out.height, out.width};941if (type == ROI)942{943CV_Assert(inputs.size() == 2);944dims[0] = inputs[1][0]; // Number of proposals;945}946else if (type == PSROI)947{948CV_Assert(inputs.size() == 2);949CV_Assert(psRoiOutChannels * pooledSize.width * pooledSize.height == inputs[0][1]);950dims[0] = inputs[1][0]; // Number of proposals;951dims[1] = psRoiOutChannels;952}953outputs.assign(type == MAX ? 2 : 1, shape(dims, 4));954955return false;956}957958virtual int64 getFLOPS(const std::vector<MatShape> &inputs,959const std::vector<MatShape> &outputs) const CV_OVERRIDE960{961CV_UNUSED(inputs); // suppress unused variable warning962long flops = 0;963964for(int i = 0; i < outputs.size(); i++)965{966if (type == MAX)967{968if (i%2 == 0)969flops += total(outputs[i])*kernel.area();970}971else972{973flops += total(outputs[i])*(kernel.area() + 1);974}975}976return flops;977}978private:979enum Type980{981MAX,982AVE,983STOCHASTIC,984ROI, // RoI pooling, https://arxiv.org/pdf/1504.08083.pdf985PSROI // Position-sensitive RoI pooling, https://arxiv.org/pdf/1605.06409.pdf986};987};988989Ptr<PoolingLayer> PoolingLayer::create(const LayerParams& params)990{991return Ptr<PoolingLayer>(new PoolingLayerImpl(params));992}993994}995}996997998