Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/dnn/src/layers/pooling_layer.cpp
16337 views
1
/*M///////////////////////////////////////////////////////////////////////////////////////
2
//
3
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4
//
5
// By downloading, copying, installing or using the software you agree to this license.
6
// If you do not agree to this license, do not download, install,
7
// copy or use the software.
8
//
9
//
10
// License Agreement
11
// For Open Source Computer Vision Library
12
//
13
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14
// Copyright (C) 2017, Intel Corporation, all rights reserved.
15
// Third party copyrights are property of their respective owners.
16
//
17
// Redistribution and use in source and binary forms, with or without modification,
18
// are permitted provided that the following conditions are met:
19
//
20
// * Redistribution's of source code must retain the above copyright notice,
21
// this list of conditions and the following disclaimer.
22
//
23
// * Redistribution's in binary form must reproduce the above copyright notice,
24
// this list of conditions and the following disclaimer in the documentation
25
// and/or other materials provided with the distribution.
26
//
27
// * The name of the copyright holders may not be used to endorse or promote products
28
// derived from this software without specific prior written permission.
29
//
30
// This software is provided by the copyright holders and contributors "as is" and
31
// any express or implied warranties, including, but not limited to, the implied
32
// warranties of merchantability and fitness for a particular purpose are disclaimed.
33
// In no event shall the Intel Corporation or contributors be liable for any direct,
34
// indirect, incidental, special, exemplary, or consequential damages
35
// (including, but not limited to, procurement of substitute goods or services;
36
// loss of use, data, or profits; or business interruption) however caused
37
// and on any theory of liability, whether in contract, strict liability,
38
// or tort (including negligence or otherwise) arising in any way out of
39
// the use of this software, even if advised of the possibility of such damage.
40
//
41
//M*/
42
43
#include "../precomp.hpp"
44
#include "layers_common.hpp"
45
#include "opencv2/core/hal/intrin.hpp"
46
#include "../op_halide.hpp"
47
#include "../op_inf_engine.hpp"
48
#include "../op_vkcom.hpp"
49
#include <float.h>
50
#include <algorithm>
51
using std::max;
52
using std::min;
53
54
#ifdef HAVE_OPENCL
55
#include "opencl_kernels_dnn.hpp"
56
using namespace cv::dnn::ocl4dnn;
57
#endif
58
59
namespace cv
60
{
61
namespace dnn
62
{
63
static inline int roundRoiSize(float v)
64
{
65
return (int)(v + (v >= 0.f ? 0.5f : -0.5f));
66
}
67
68
class PoolingLayerImpl CV_FINAL : public PoolingLayer
69
{
70
public:
71
PoolingLayerImpl(const LayerParams& params)
72
{
73
computeMaxIdx = true;
74
globalPooling = false;
75
stride = Size(1, 1);
76
77
if (params.has("pool") || params.has("kernel_size") ||
78
params.has("kernel_w") || params.has("kernel_h"))
79
{
80
String pool = toLowerCase(params.get<String>("pool", "max"));
81
if (pool == "max")
82
type = MAX;
83
else if (pool == "ave")
84
type = AVE;
85
else if (pool == "stochastic")
86
type = STOCHASTIC;
87
else
88
CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\"");
89
90
getPoolingKernelParams(params, kernel.height, kernel.width, globalPooling,
91
pad_t, pad_l, pad_b, pad_r, stride.height, stride.width, padMode);
92
93
pad.width = pad_l;
94
pad.height = pad_t;
95
}
96
else if (params.has("pooled_w") || params.has("pooled_h"))
97
{
98
type = ROI;
99
computeMaxIdx = false;
100
pooledSize.width = params.get<uint32_t>("pooled_w", 1);
101
pooledSize.height = params.get<uint32_t>("pooled_h", 1);
102
}
103
else if (params.has("output_dim") && params.has("group_size"))
104
{
105
type = PSROI;
106
pooledSize.width = params.get<int>("group_size");
107
pooledSize.height = pooledSize.width;
108
psRoiOutChannels = params.get<int>("output_dim");
109
}
110
else
111
CV_Error(Error::StsBadArg, "Cannot determine pooling type");
112
setParamsFrom(params);
113
ceilMode = params.get<bool>("ceil_mode", true);
114
spatialScale = params.get<float>("spatial_scale", 1);
115
avePoolPaddedArea = params.get<bool>("ave_pool_padded_area", true);
116
}
117
118
#ifdef HAVE_OPENCL
119
Ptr<OCL4DNNPool<float> > poolOp;
120
#endif
121
122
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
123
{
124
std::vector<Mat> inputs, outputs;
125
inputs_arr.getMatVector(inputs);
126
outputs_arr.getMatVector(outputs);
127
128
CV_Assert(!inputs.empty());
129
130
cv::Size inp(inputs[0].size[3], inputs[0].size[2]),
131
out(outputs[0].size[3], outputs[0].size[2]);
132
133
if(globalPooling)
134
{
135
kernel = inp;
136
}
137
138
getConvPoolPaddings(inp, out, kernel, stride, padMode, Size(1, 1), pad_t, pad_l, pad_b, pad_r);
139
pad.width = pad_l;
140
pad.height = pad_t;
141
142
#ifdef HAVE_OPENCL
143
poolOp.release();
144
#endif
145
}
146
147
virtual bool supportBackend(int backendId) CV_OVERRIDE
148
{
149
if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
150
{
151
if (preferableTarget == DNN_TARGET_MYRIAD)
152
return type == MAX || type == AVE;
153
else
154
return type != STOCHASTIC;
155
}
156
else
157
return backendId == DNN_BACKEND_OPENCV ||
158
backendId == DNN_BACKEND_HALIDE && haveHalide() &&
159
(type == MAX || type == AVE && !pad_t && !pad_l && !pad_b && !pad_r) ||
160
backendId == DNN_BACKEND_VKCOM && haveVulkan() &&
161
(type == MAX || type == AVE);
162
}
163
164
#ifdef HAVE_OPENCL
165
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, InputArrayOfArrays internals)
166
{
167
std::vector<UMat> inputs;
168
std::vector<UMat> outputs;
169
170
bool use_half = (inps.depth() == CV_16S);
171
inps.getUMatVector(inputs);
172
outs.getUMatVector(outputs);
173
174
if (poolOp.empty())
175
{
176
OCL4DNNPoolConfig config;
177
178
config.in_shape = shape(inputs[0]);
179
config.out_shape = shape(outputs[0]);
180
config.kernel = kernel;
181
config.pad_l = pad_l;
182
config.pad_t = pad_t;
183
config.pad_r = pad_r;
184
config.pad_b = pad_b;
185
config.stride = stride;
186
config.channels = inputs[0].size[1];
187
config.pool_method = type == MAX ? LIBDNN_POOLING_METHOD_MAX :
188
(type == AVE ? LIBDNN_POOLING_METHOD_AVE :
189
LIBDNN_POOLING_METHOD_STO);
190
config.avePoolPaddedArea = avePoolPaddedArea;
191
config.computeMaxIdx = computeMaxIdx;
192
config.use_half = use_half;
193
poolOp = Ptr<OCL4DNNPool<float> >(new OCL4DNNPool<float>(config));
194
}
195
196
for (size_t ii = 0; ii < inputs.size(); ii++)
197
{
198
UMat& inpMat = inputs[ii];
199
int out_index = (type == MAX) ? 2 : 1;
200
UMat& outMat = outputs[out_index * ii];
201
UMat maskMat = (type == MAX) ? outputs[2 * ii + 1] : UMat();
202
203
CV_Assert(inpMat.offset == 0 && outMat.offset == 0);
204
205
if (!poolOp->Forward(inpMat, outMat, maskMat))
206
return false;
207
}
208
return true;
209
}
210
#endif
211
212
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
213
{
214
CV_TRACE_FUNCTION();
215
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
216
217
if (type == MAX || type == AVE || type == STOCHASTIC)
218
{
219
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
220
forward_ocl(inputs_arr, outputs_arr, internals_arr))
221
}
222
if (inputs_arr.depth() == CV_16S)
223
{
224
forward_fallback(inputs_arr, outputs_arr, internals_arr);
225
return;
226
}
227
228
std::vector<Mat> inputs, outputs;
229
inputs_arr.getMatVector(inputs);
230
outputs_arr.getMatVector(outputs);
231
232
switch (type)
233
{
234
case MAX:
235
CV_Assert_N(inputs.size() == 1, outputs.size() == 2);
236
maxPooling(inputs[0], outputs[0], outputs[1]);
237
break;
238
case AVE:
239
CV_Assert_N(inputs.size() == 1, outputs.size() == 1);
240
avePooling(inputs[0], outputs[0]);
241
break;
242
case ROI: case PSROI:
243
CV_Assert_N(inputs.size() == 2, outputs.size() == 1);
244
roiPooling(inputs[0], inputs[1], outputs[0]);
245
break;
246
default:
247
CV_Error(Error::StsNotImplemented, "Not implemented");
248
break;
249
}
250
}
251
252
virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
253
{
254
#ifdef HAVE_VULKAN
255
int padding_mode;
256
vkcom::PoolType pool_type;
257
int filter_size[2] = {kernel.height, kernel.width};
258
int pad_size[2] = {pad.height, pad.width};
259
int stride_size[2] = {stride.height, stride.width};
260
pool_type = type == MAX ? vkcom::kPoolTypeMax:
261
(type == AVE ? vkcom::kPoolTypeAvg:
262
vkcom::kPoolTypeNum);
263
264
if (padMode.empty())
265
{
266
padding_mode = vkcom::kPaddingModeCaffe;
267
}
268
else if (padMode == "VALID")
269
{
270
padding_mode = vkcom::kPaddingModeValid;
271
}
272
else if (padMode == "SAME")
273
{
274
padding_mode = vkcom::kPaddingModeSame;
275
}
276
else
277
CV_Error(Error::StsError, "Unsupported padding mode " + padMode);
278
279
std::shared_ptr<vkcom::OpBase> op(new vkcom::OpPool(filter_size, pad_size,
280
stride_size, padding_mode,
281
pool_type, avePoolPaddedArea));
282
return Ptr<BackendNode>(new VkComBackendNode(inputs, op));
283
#endif
284
return Ptr<BackendNode>();
285
}
286
287
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
288
{
289
if (type == MAX)
290
return initMaxPoolingHalide(inputs);
291
else if (type == AVE)
292
return initAvePoolingHalide(inputs);
293
else
294
return Ptr<BackendNode>();
295
}
296
297
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
298
{
299
#ifdef HAVE_INF_ENGINE
300
InferenceEngine::LayerParams lp;
301
lp.name = name;
302
lp.precision = InferenceEngine::Precision::FP32;
303
304
std::shared_ptr<InferenceEngine::CNNLayer> ieLayer;
305
if (type == MAX || type == AVE)
306
{
307
lp.type = "Pooling";
308
InferenceEngine::PoolingLayer* poolLayer = new InferenceEngine::PoolingLayer(lp);
309
#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2018R3)
310
poolLayer->_kernel.insert(InferenceEngine::X_AXIS, kernel.width);
311
poolLayer->_kernel.insert(InferenceEngine::Y_AXIS, kernel.height);
312
poolLayer->_stride.insert(InferenceEngine::X_AXIS, stride.width);
313
poolLayer->_stride.insert(InferenceEngine::Y_AXIS, stride.height);
314
poolLayer->_padding.insert(InferenceEngine::X_AXIS, pad_l);
315
poolLayer->_padding.insert(InferenceEngine::Y_AXIS, pad_t);
316
poolLayer->_pads_end.insert(InferenceEngine::X_AXIS, pad_r);
317
poolLayer->_pads_end.insert(InferenceEngine::Y_AXIS, pad_b);
318
#else
319
poolLayer->_kernel_x = kernel.width;
320
poolLayer->_kernel_y = kernel.height;
321
poolLayer->_stride_x = stride.width;
322
poolLayer->_stride_y = stride.height;
323
poolLayer->_padding_x = pad_l;
324
poolLayer->_padding_y = pad_t;
325
poolLayer->params["pad-r"] = format("%d", pad_r);
326
poolLayer->params["pad-b"] = format("%d", pad_b);
327
#endif
328
poolLayer->_exclude_pad = type == AVE && padMode == "SAME";
329
poolLayer->params["rounding-type"] = ceilMode ? "ceil" : "floor";
330
poolLayer->_type = type == MAX ? InferenceEngine::PoolingLayer::PoolType::MAX :
331
InferenceEngine::PoolingLayer::PoolType::AVG;
332
ieLayer = std::shared_ptr<InferenceEngine::CNNLayer>(poolLayer);
333
}
334
else if (type == ROI)
335
{
336
lp.type = "ROIPooling";
337
ieLayer = std::shared_ptr<InferenceEngine::CNNLayer>(new InferenceEngine::CNNLayer(lp));
338
ieLayer->params["pooled_w"] = format("%d", pooledSize.width);
339
ieLayer->params["pooled_h"] = format("%d", pooledSize.height);
340
ieLayer->params["spatial_scale"] = format("%f", spatialScale);
341
}
342
else if (type == PSROI)
343
{
344
lp.type = "PSROIPooling";
345
ieLayer = std::shared_ptr<InferenceEngine::CNNLayer>(new InferenceEngine::CNNLayer(lp));
346
ieLayer->params["output_dim"] = format("%d", psRoiOutChannels);
347
ieLayer->params["group_size"] = format("%d", pooledSize.width);
348
ieLayer->params["spatial_scale"] = format("%f", spatialScale);
349
}
350
else
351
CV_Error(Error::StsNotImplemented, "Unsupported pooling type");
352
353
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
354
#endif // HAVE_INF_ENGINE
355
return Ptr<BackendNode>();
356
}
357
358
359
class PoolingInvoker : public ParallelLoopBody
360
{
361
public:
362
const Mat* src, *rois;
363
Mat *dst, *mask;
364
Size kernel, stride;
365
int pad_l, pad_t, pad_r, pad_b;
366
bool avePoolPaddedArea;
367
int nstripes;
368
bool computeMaxIdx;
369
std::vector<int> ofsbuf;
370
int poolingType;
371
float spatialScale;
372
373
PoolingInvoker() : src(0), rois(0), dst(0), mask(0), pad_l(0), pad_t(0), pad_r(0), pad_b(0),
374
avePoolPaddedArea(false), nstripes(0),
375
computeMaxIdx(0), poolingType(MAX), spatialScale(0) {}
376
377
static void run(const Mat& src, const Mat& rois, Mat& dst, Mat& mask, Size kernel,
378
Size stride, int pad_l, int pad_t, int pad_r, int pad_b, bool avePoolPaddedArea, int poolingType, float spatialScale,
379
bool computeMaxIdx, int nstripes)
380
{
381
CV_Assert_N(
382
src.isContinuous(), dst.isContinuous(),
383
src.type() == CV_32F, src.type() == dst.type(),
384
src.dims == 4, dst.dims == 4,
385
((poolingType == ROI || poolingType == PSROI) && dst.size[0] ==rois.size[0] || src.size[0] == dst.size[0]),
386
poolingType == PSROI || src.size[1] == dst.size[1],
387
(mask.empty() || (mask.type() == src.type() && mask.size == dst.size)));
388
389
PoolingInvoker p;
390
391
p.src = &src;
392
p.rois = &rois;
393
p.dst = &dst;
394
p.mask = &mask;
395
p.kernel = kernel;
396
p.stride = stride;
397
p.pad_l = pad_l;
398
p.pad_t = pad_t;
399
p.pad_r = pad_r;
400
p.pad_b = pad_b;
401
p.avePoolPaddedArea = avePoolPaddedArea;
402
p.nstripes = nstripes;
403
p.computeMaxIdx = computeMaxIdx;
404
p.poolingType = poolingType;
405
p.spatialScale = spatialScale;
406
407
if( !computeMaxIdx )
408
{
409
p.ofsbuf.resize(kernel.width*kernel.height);
410
for( int i = 0; i < kernel.height; i++ )
411
for( int j = 0; j < kernel.width; j++ )
412
p.ofsbuf[i*kernel.width + j] = src.size[3]*i + j;
413
}
414
415
parallel_for_(Range(0, nstripes), p, nstripes);
416
}
417
418
void operator()(const Range& r) const CV_OVERRIDE
419
{
420
int channels = dst->size[1], width = dst->size[3], height = dst->size[2];
421
int inp_width = src->size[3], inp_height = src->size[2];
422
size_t total = dst->total();
423
size_t stripeSize = (total + nstripes - 1)/nstripes;
424
size_t stripeStart = r.start*stripeSize;
425
size_t stripeEnd = std::min(r.end*stripeSize, total);
426
int kernel_w = kernel.width, kernel_h = kernel.height;
427
int stride_w = stride.width, stride_h = stride.height;
428
bool compMaxIdx = computeMaxIdx;
429
430
#if CV_SIMD128
431
const int* ofsptr = ofsbuf.empty() ? 0 : (const int*)&ofsbuf[0];
432
if (poolingType == MAX && !compMaxIdx && !ofsptr)
433
CV_Error(Error::StsBadArg, "ofsbuf should be initialized in this mode");
434
v_float32x4 idx00(0.f, (float)stride_w, (float)(stride_w*2), (float)(stride_w*3));
435
v_float32x4 ones = v_setall_f32(1.f);
436
v_float32x4 idx_delta = v_setall_f32((float)(inp_width - kernel_w));
437
#endif
438
439
for( size_t ofs0 = stripeStart; ofs0 < stripeEnd; )
440
{
441
size_t ofs = ofs0;
442
int x0 = (int)(ofs % width);
443
ofs /= width;
444
int y0 = (int)(ofs % height);
445
ofs /= height;
446
int c = (int)(ofs % channels);
447
int n = (int)(ofs / channels);
448
int ystart, yend;
449
450
const float *srcData = 0;
451
if (poolingType == ROI)
452
{
453
const float *roisData = rois->ptr<float>(n);
454
int ystartROI = roundRoiSize(roisData[2] * spatialScale);
455
int yendROI = roundRoiSize(roisData[4] * spatialScale);
456
int roiHeight = std::max(yendROI - ystartROI + 1, 1);
457
float roiRatio = (float)roiHeight / height;
458
459
ystart = ystartROI + y0 * roiRatio;
460
yend = ystartROI + std::ceil((y0 + 1) * roiRatio);
461
462
CV_Assert(roisData[0] < src->size[0]);
463
srcData = src->ptr<float>(roisData[0], c);
464
}
465
else if (poolingType == PSROI)
466
{
467
const float *roisData = rois->ptr<float>(n);
468
float ystartROI = roundRoiSize(roisData[2]) * spatialScale;
469
float yendROI = roundRoiSize(roisData[4] + 1) * spatialScale;
470
float roiHeight = std::max(yendROI - ystartROI, 0.1f);
471
float roiRatio = roiHeight / height;
472
473
ystart = (int)std::floor(ystartROI + y0 * roiRatio);
474
yend = (int)std::ceil(ystartROI + (y0 + 1) * roiRatio);
475
}
476
else
477
{
478
ystart = y0 * stride_h - pad_t;
479
yend = min(ystart + kernel_h, inp_height + pad_b);
480
srcData = src->ptr<float>(n, c);
481
}
482
int ydelta = yend - ystart;
483
ystart = max(ystart, 0);
484
yend = min(yend, inp_height);
485
float *dstData = dst->ptr<float>(n, c, y0);
486
float *dstMaskData = mask->data ? mask->ptr<float>(n, c, y0) : 0;
487
488
int delta = std::min((int)(stripeEnd - ofs0), width - x0);
489
ofs0 += delta;
490
int x1 = x0 + delta;
491
492
if( poolingType == MAX)
493
for( ; x0 < x1; x0++ )
494
{
495
int xstart = x0 * stride_w - pad_l;
496
int xend = min(xstart + kernel_w, inp_width);
497
xstart = max(xstart, 0);
498
if (xstart >= xend || ystart >= yend)
499
{
500
dstData[x0] = 0;
501
if (compMaxIdx && dstMaskData)
502
dstMaskData[x0] = -1;
503
continue;
504
}
505
#if CV_SIMD128
506
if( xstart > 0 && x0 + 7 < x1 && (x0 + 7) * stride_w - pad_l + kernel_w < inp_width )
507
{
508
if( compMaxIdx )
509
{
510
v_float32x4 max_val0 = v_setall_f32(-FLT_MAX);
511
v_float32x4 max_val1 = max_val0;
512
v_float32x4 max_idx0 = v_setall_f32(-1.f);
513
v_float32x4 max_idx1 = max_idx0;
514
int index0 = ystart * inp_width + xstart;
515
v_float32x4 idx0 = idx00 + v_setall_f32((float)index0);
516
v_float32x4 idx1 = idx0 + v_setall_f32((float)(stride_w*4));
517
518
for (int y = ystart; y < yend; ++y)
519
{
520
for (int x = xstart; x < xend; ++x, idx0 += ones, idx1 += ones)
521
{
522
const int index = y * inp_width + x;
523
v_float32x4 v0(srcData[index], srcData[index + stride_w],
524
srcData[index + stride_w*2], srcData[index + stride_w*3]);
525
v_float32x4 v1(srcData[index + stride_w*4], srcData[index + stride_w*5],
526
srcData[index + stride_w*6], srcData[index + stride_w*7]);
527
max_idx0 = v_select(v0 > max_val0, idx0, max_idx0);
528
max_idx1 = v_select(v1 > max_val1, idx1, max_idx1);
529
max_val0 = v_max(max_val0, v0);
530
max_val1 = v_max(max_val1, v1);
531
}
532
idx0 += idx_delta;
533
idx1 += idx_delta;
534
}
535
v_store(dstData + x0, max_val0);
536
v_store(dstData + x0 + 4, max_val1);
537
if (dstMaskData)
538
{
539
v_store(dstMaskData + x0, max_idx0);
540
v_store(dstMaskData + x0 + 4, max_idx1);
541
}
542
x0 += 7;
543
}
544
else
545
{
546
v_float32x4 max_val0 = v_setall_f32(-FLT_MAX);
547
v_float32x4 max_val1 = max_val0;
548
549
if( yend - ystart == kernel_h )
550
{
551
const float* srcData1 = srcData + ystart*inp_width + xstart;
552
if( stride_w == 1 )
553
for (int k = 0; k < kernel_w*kernel_h; k++)
554
{
555
int index = ofsptr[k];
556
v_float32x4 v0 = v_load(srcData1 + index);
557
v_float32x4 v1 = v_load(srcData1 + index + 4);
558
max_val0 = v_max(max_val0, v0);
559
max_val1 = v_max(max_val1, v1);
560
}
561
else if( stride_w == 2 )
562
for (int k = 0; k < kernel_w*kernel_h; k++)
563
{
564
int index = ofsptr[k];
565
v_float32x4 v0, v1, dummy;
566
v_load_deinterleave(srcData1 + index, v0, dummy); // f0 f2 f4 f6 ,f1 f3 f5 f7
567
v_load_deinterleave(srcData1 + index + 8, v1, dummy); // f8 f10 f12 f14 ,f9 f11 f13 f15
568
max_val0 = v_max(max_val0, v0);
569
max_val1 = v_max(max_val1, v1);
570
}
571
else
572
for (int k = 0; k < kernel_w*kernel_h; k++)
573
{
574
int index = ofsptr[k];
575
v_float32x4 v0(srcData1[index], srcData1[index + stride_w],
576
srcData1[index + stride_w*2], srcData1[index + stride_w*3]);
577
v_float32x4 v1(srcData1[index + stride_w*4], srcData1[index + stride_w*5],
578
srcData1[index + stride_w*6], srcData1[index + stride_w*7]);
579
max_val0 = v_max(max_val0, v0);
580
max_val1 = v_max(max_val1, v1);
581
}
582
}
583
else
584
{
585
for (int y = ystart; y < yend; ++y)
586
{
587
for (int x = xstart; x < xend; ++x)
588
{
589
const int index = y * inp_width + x;
590
v_float32x4 v0(srcData[index], srcData[index + stride_w],
591
srcData[index + stride_w*2], srcData[index + stride_w*3]);
592
v_float32x4 v1(srcData[index + stride_w*4], srcData[index + stride_w*5],
593
srcData[index + stride_w*6], srcData[index + stride_w*7]);
594
max_val0 = v_max(max_val0, v0);
595
max_val1 = v_max(max_val1, v1);
596
}
597
}
598
}
599
v_store(dstData + x0, max_val0);
600
v_store(dstData + x0 + 4, max_val1);
601
x0 += 7;
602
}
603
}
604
else
605
#endif
606
{
607
float max_val = -FLT_MAX;
608
if( compMaxIdx )
609
{
610
int max_index = -1;
611
for (int y = ystart; y < yend; ++y)
612
for (int x = xstart; x < xend; ++x)
613
{
614
const int index = y * inp_width + x;
615
float val = srcData[index];
616
if (val > max_val)
617
{
618
max_val = val;
619
max_index = index;
620
}
621
}
622
623
dstData[x0] = max_val;
624
if (dstMaskData)
625
dstMaskData[x0] = max_index;
626
}
627
else
628
{
629
for (int y = ystart; y < yend; ++y)
630
for (int x = xstart; x < xend; ++x)
631
{
632
const int index = y * inp_width + x;
633
float val = srcData[index];
634
max_val = std::max(max_val, val);
635
}
636
637
dstData[x0] = max_val;
638
}
639
}
640
}
641
else if (poolingType == AVE)
642
{
643
for( ; x0 < x1; x0++ )
644
{
645
int xstart = x0 * stride_w - pad_l;
646
int xend = min(xstart + kernel_w, inp_width + pad_r);
647
int xdelta = xend - xstart;
648
xstart = max(xstart, 0);
649
xend = min(xend, inp_width);
650
float inv_kernel_area = avePoolPaddedArea ? xdelta * ydelta : ((yend - ystart) * (xend - xstart));
651
inv_kernel_area = 1.0 / inv_kernel_area;
652
#if CV_SIMD128
653
if( xstart > 0 && x0 + 7 < x1 && (x0 + 7) * stride_w - pad_l + kernel_w < inp_width )
654
{
655
v_float32x4 sum_val0 = v_setzero_f32(), sum_val1 = v_setzero_f32();
656
v_float32x4 ikarea = v_setall_f32(inv_kernel_area);
657
658
for (int y = ystart; y < yend; ++y)
659
{
660
for (int x = xstart; x < xend; ++x)
661
{
662
const int index = y * inp_width + x;
663
v_float32x4 v0(srcData[index], srcData[index + stride_w],
664
srcData[index + stride_w*2], srcData[index + stride_w*3]);
665
v_float32x4 v1(srcData[index + stride_w*4], srcData[index + stride_w*5],
666
srcData[index + stride_w*6], srcData[index + stride_w*7]);
667
sum_val0 += v0;
668
sum_val1 += v1;
669
}
670
}
671
v_store(dstData + x0, sum_val0*ikarea);
672
v_store(dstData + x0 + 4, sum_val1*ikarea);
673
x0 += 7;
674
}
675
else
676
#endif
677
{
678
float sum_val = 0.f;
679
for (int y = ystart; y < yend; ++y)
680
for (int x = xstart; x < xend; ++x)
681
{
682
const int index = y * inp_width + x;
683
float val = srcData[index];
684
sum_val += val;
685
}
686
687
dstData[x0] = sum_val*inv_kernel_area;
688
}
689
}
690
}
691
else if (poolingType == ROI)
692
{
693
const float *roisData = rois->ptr<float>(n);
694
int xstartROI = roundRoiSize(roisData[1] * spatialScale);
695
int xendROI = roundRoiSize(roisData[3] * spatialScale);
696
int roiWidth = std::max(xendROI - xstartROI + 1, 1);
697
float roiRatio = (float)roiWidth / width;
698
for( ; x0 < x1; x0++ )
699
{
700
int xstart = xstartROI + x0 * roiRatio;
701
int xend = xstartROI + std::ceil((x0 + 1) * roiRatio);
702
xstart = max(xstart, 0);
703
xend = min(xend, inp_width);
704
if (xstart >= xend || ystart >= yend)
705
{
706
dstData[x0] = 0;
707
if (compMaxIdx && dstMaskData)
708
dstMaskData[x0] = -1;
709
continue;
710
}
711
float max_val = -FLT_MAX;
712
for (int y = ystart; y < yend; ++y)
713
for (int x = xstart; x < xend; ++x)
714
{
715
const int index = y * inp_width + x;
716
float val = srcData[index];
717
max_val = std::max(max_val, val);
718
}
719
dstData[x0] = max_val;
720
}
721
}
722
else // PSROI
723
{
724
const float *roisData = rois->ptr<float>(n);
725
CV_Assert(roisData[0] < src->size[0]);
726
float xstartROI = roundRoiSize(roisData[1]) * spatialScale;
727
float xendROI = roundRoiSize(roisData[3] + 1) * spatialScale;
728
float roiWidth = std::max(xendROI - xstartROI, 0.1f);
729
float roiRatio = roiWidth / width;
730
for( ; x0 < x1; x0++ )
731
{
732
int xstart = (int)std::floor(xstartROI + x0 * roiRatio);
733
int xend = (int)std::ceil(xstartROI + (x0 + 1) * roiRatio);
734
xstart = max(xstart, 0);
735
xend = min(xend, inp_width);
736
if (xstart >= xend || ystart >= yend)
737
{
738
dstData[x0] = 0;
739
continue;
740
}
741
742
srcData = src->ptr<float>(roisData[0], (c * height + y0) * width + x0);
743
float sum_val = 0.f;
744
for (int y = ystart; y < yend; ++y)
745
for (int x = xstart; x < xend; ++x)
746
{
747
const int index = y * inp_width + x;
748
float val = srcData[index];
749
sum_val += val;
750
}
751
dstData[x0] = sum_val / ((yend - ystart) * (xend - xstart));
752
}
753
}
754
}
755
}
756
};
757
758
void maxPooling(Mat &src, Mat &dst, Mat &mask)
759
{
760
const int nstripes = getNumThreads();
761
Mat rois;
762
PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad_l, pad_t, pad_r, pad_b, avePoolPaddedArea, type, spatialScale, computeMaxIdx, nstripes);
763
}
764
765
void avePooling(Mat &src, Mat &dst)
766
{
767
const int nstripes = getNumThreads();
768
Mat rois, mask;
769
PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad_l, pad_t, pad_r, pad_b, avePoolPaddedArea, type, spatialScale, computeMaxIdx, nstripes);
770
}
771
772
void roiPooling(const Mat &src, const Mat &rois, Mat &dst)
773
{
774
const int nstripes = getNumThreads();
775
Mat mask;
776
PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad_l, pad_t, pad_r, pad_b, avePoolPaddedArea, type, spatialScale, computeMaxIdx, nstripes);
777
}
778
779
virtual Ptr<BackendNode> initMaxPoolingHalide(const std::vector<Ptr<BackendWrapper> > &inputs)
780
{
781
#ifdef HAVE_HALIDE
782
Halide::Buffer<float> inputBuffer = halideBuffer(inputs[0]);
783
const int inWidth = inputBuffer.width();
784
const int inHeight = inputBuffer.height();
785
786
Halide::Var x("x"), y("y"), c("c"), n("n");
787
Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
788
Halide::RDom r(0, kernel.width, 0, kernel.height);
789
Halide::Expr kx, ky;
790
if(pad_l || pad_t)
791
{
792
kx = clamp(x * stride.width + r.x - pad_l, 0, inWidth - 1);
793
ky = clamp(y * stride.height + r.y - pad_t, 0, inHeight - 1);
794
}
795
else
796
{
797
kx = min(x * stride.width + r.x, inWidth - 1);
798
ky = min(y * stride.height + r.y, inHeight - 1);
799
}
800
801
// Halide::argmax returns tuple (r.x, r.y, max).
802
Halide::Tuple res = argmax(inputBuffer(kx, ky, c, n));
803
804
// Compute offset from argmax in range [0, kernel_size).
805
Halide::Expr max_index;
806
if(pad_l || pad_t)
807
{
808
max_index = clamp(y * stride.height + res[1] - pad_t,
809
0, inHeight - 1) * inWidth +
810
clamp(x * stride.width + res[0] - pad_l,
811
0, inWidth - 1);
812
}
813
else
814
{
815
max_index = min(y * stride.height + res[1], inHeight - 1) * inWidth +
816
min(x * stride.width + res[0], inWidth - 1);
817
}
818
top(x, y, c, n) = { res[2], Halide::cast<float>(max_index) };
819
return Ptr<BackendNode>(new HalideBackendNode(top));
820
#endif // HAVE_HALIDE
821
return Ptr<BackendNode>();
822
}
823
824
virtual Ptr<BackendNode> initAvePoolingHalide(const std::vector<Ptr<BackendWrapper> > &inputs)
825
{
826
#ifdef HAVE_HALIDE
827
Halide::Buffer<float> inputBuffer = halideBuffer(inputs[0]);
828
829
const int inW = inputBuffer.width(), inH = inputBuffer.height();
830
if ((inW - kernel.width) % stride.width || (inH - kernel.height) % stride.height)
831
{
832
CV_Error(cv::Error::StsNotImplemented,
833
"Halide backend for average pooling with partial "
834
"kernels is not implemented");
835
}
836
837
const float norm = 1.0f / (kernel.width * kernel.height);
838
839
Halide::Var x("x"), y("y"), c("c"), n("n");
840
Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
841
Halide::RDom r(0, kernel.width, 0, kernel.height);
842
top(x, y, c, n) = sum(
843
inputBuffer(x * stride.width + r.x,
844
y * stride.height + r.y, c, n)) * norm;
845
return Ptr<BackendNode>(new HalideBackendNode(top));
846
#endif // HAVE_HALIDE
847
return Ptr<BackendNode>();
848
}
849
850
virtual void applyHalideScheduler(Ptr<BackendNode>& node,
851
const std::vector<Mat*> &inputs,
852
const std::vector<Mat> &outputs,
853
int targetId) const CV_OVERRIDE
854
{
855
#ifdef HAVE_HALIDE
856
if (targetId != DNN_TARGET_CPU)
857
{
858
Layer::applyHalideScheduler(node, inputs, outputs, targetId);
859
return;
860
}
861
Halide::Var x("x"), y("y"), c("c"), n("n"), tile("tile"),
862
xi("xi"), yi("yi"), ci("ci"), xo("xo"), yo("yo"), co("co");
863
Halide::Func& top = node.dynamicCast<HalideBackendNode>()->funcs.back();
864
865
int outW, outH, outC, outN;
866
getCanonicalSize(outputs[0].size, &outW, &outH, &outC, &outN);
867
868
if (outW < 8 || outH < 8)
869
{
870
if (outC > 8)
871
top.split(c, co, ci, 8)
872
.fuse(x, y, tile).fuse(co, tile, tile).fuse(n, tile, tile)
873
.parallel(tile)
874
.vectorize(ci);
875
else
876
{
877
top.fuse(y, c, tile).fuse(n, tile, tile)
878
.parallel(tile);
879
if (outW > 1)
880
top.vectorize(x);
881
}
882
}
883
else
884
{
885
if (outC > 8)
886
top.split(x, xo, xi, 8).split(y, yo, yi, 8).split(c, co, ci, 8)
887
.fuse(xo, yo, tile).fuse(co, tile, tile).fuse(n, tile, tile)
888
.parallel(tile)
889
.vectorize(xi);
890
else
891
top.split(x, xo, xi, 8).split(y, yo, yi, 8)
892
.fuse(xo, yo, tile).fuse(c, tile, tile).fuse(n, tile, tile)
893
.parallel(tile)
894
.vectorize(xi);
895
}
896
#endif // HAVE_HALIDE
897
}
898
899
bool getMemoryShapes(const std::vector<MatShape> &inputs,
900
const int requiredOutputs,
901
std::vector<MatShape> &outputs,
902
std::vector<MatShape> &internals) const CV_OVERRIDE
903
{
904
CV_Assert(inputs.size() != 0);
905
Size in(inputs[0][3], inputs[0][2]), out;
906
907
if (globalPooling)
908
{
909
out.height = 1;
910
out.width = 1;
911
}
912
else if (type == ROI || type == PSROI)
913
{
914
out.height = pooledSize.height;
915
out.width = pooledSize.width;
916
}
917
else if (padMode.empty())
918
{
919
float height = (float)(in.height + pad_t + pad_b - kernel.height) / stride.height;
920
float width = (float)(in.width + pad_l + pad_r - kernel.width) / stride.width;
921
out.height = 1 + (ceilMode ? ceil(height) : floor(height));
922
out.width = 1 + (ceilMode ? ceil(width) : floor(width));
923
924
if (pad_r || pad_b)
925
{
926
// If we have padding, ensure that the last pooling starts strictly
927
// inside the image (instead of at the padding); otherwise clip the last.
928
if ((out.height - 1) * stride.height >= in.height + pad_b)
929
--out.height;
930
if ((out.width - 1) * stride.width >= in.width + pad_r)
931
--out.width;
932
CV_Assert((out.height - 1) * stride.height < in.height + pad_b);
933
CV_Assert((out.width - 1) * stride.width < in.width + pad_r);
934
}
935
}
936
else
937
{
938
getConvPoolOutParams(in, kernel, stride, padMode, Size(1, 1), out);
939
}
940
941
int dims[] = {inputs[0][0], inputs[0][1], out.height, out.width};
942
if (type == ROI)
943
{
944
CV_Assert(inputs.size() == 2);
945
dims[0] = inputs[1][0]; // Number of proposals;
946
}
947
else if (type == PSROI)
948
{
949
CV_Assert(inputs.size() == 2);
950
CV_Assert(psRoiOutChannels * pooledSize.width * pooledSize.height == inputs[0][1]);
951
dims[0] = inputs[1][0]; // Number of proposals;
952
dims[1] = psRoiOutChannels;
953
}
954
outputs.assign(type == MAX ? 2 : 1, shape(dims, 4));
955
956
return false;
957
}
958
959
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
960
const std::vector<MatShape> &outputs) const CV_OVERRIDE
961
{
962
CV_UNUSED(inputs); // suppress unused variable warning
963
long flops = 0;
964
965
for(int i = 0; i < outputs.size(); i++)
966
{
967
if (type == MAX)
968
{
969
if (i%2 == 0)
970
flops += total(outputs[i])*kernel.area();
971
}
972
else
973
{
974
flops += total(outputs[i])*(kernel.area() + 1);
975
}
976
}
977
return flops;
978
}
979
private:
980
enum Type
981
{
982
MAX,
983
AVE,
984
STOCHASTIC,
985
ROI, // RoI pooling, https://arxiv.org/pdf/1504.08083.pdf
986
PSROI // Position-sensitive RoI pooling, https://arxiv.org/pdf/1605.06409.pdf
987
};
988
};
989
990
Ptr<PoolingLayer> PoolingLayer::create(const LayerParams& params)
991
{
992
return Ptr<PoolingLayer>(new PoolingLayerImpl(params));
993
}
994
995
}
996
}
997
998