Path: blob/master/modules/dnn/src/opencl/ocl4dnn_pooling.cl
16337 views
/*M///////////////////////////////////////////////////////////////////////////////////////1//2// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.3//4// By downloading, copying, installing or using the software you agree to this license.5// If you do not agree to this license, do not download, install,6// copy or use the software.7//8//9// License Agreement10// For Open Source Computer Vision Library11//12// Copyright (C) 2017, Intel Corporation, all rights reserved.13// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved.14// Third party copyrights are property of their respective owners.15//16// Redistribution and use in source and binary forms, with or without modification,17// are permitted provided that the following conditions are met:18//19// * Redistribution's of source code must retain the above copyright notice,20// this list of conditions and the following disclaimer.21//22// * Redistribution's in binary form must reproduce the above copyright notice,23// this list of conditions and the following disclaimer in the documentation24// and/or other materials provided with the distribution.25//26// * The name of the copyright holders may not be used to endorse or promote products27// derived from this software without specific prior written permission.28//29// This software is provided by the copyright holders and contributors "as is" and30// any express or implied warranties, including, but not limited to, the implied31// warranties of merchantability and fitness for a particular purpose are disclaimed.32// In no event shall the Intel Corporation or contributors be liable for any direct,33// indirect, incidental, special, exemplary, or consequential damages34// (including, but not limited to, procurement of substitute goods or services;35// loss of use, data, or profits; or business interruption) however caused36// and on any theory of liability, whether in contract, strict liability,37// or tort (including negligence or otherwise) arising in any way out of38// the use of this software, even if advised of the possibility of such damage.39//40//M*/4142#define CONCAT(A,B) A##_##B43#define TEMPLATE(name,type) CONCAT(name,type)4445#if defined(cl_khr_fp16)46#pragma OPENCL EXTENSION cl_khr_fp16 : enable47#endif4849#if defined KERNEL_MAX_POOL5051__kernel void52#ifdef HAVE_MASK53TEMPLATE(max_pool_forward_mask, Dtype)54#else55TEMPLATE(max_pool_forward, Dtype)56#endif57(58const int nthreads, __global const Dtype* bottom_data,59const int channels, const int height, const int width,60const int pooled_height, const int pooled_width,61__global Dtype* top_data62#ifdef HAVE_MASK63, __global Dtype* mask64#endif65)66{67int index = get_global_id(0);68if (index >= nthreads)69return;7071const int pw = index % pooled_width;72const int xx = index / pooled_width;73const int ph = xx % pooled_height;74const int ch = xx / pooled_height;75int hstart = ph * STRIDE_H - PAD_T;76int wstart = pw * STRIDE_W - PAD_L;77Dtype maxval = -FLT_MAX;78int maxidx = -1;79int in_offset = ch * height * width;80for (int h = 0; h < KERNEL_H; ++h)81{82int off_y = hstart + h;83if (off_y >= 0 && off_y < height)84{85for (int w = 0; w < KERNEL_W; ++w)86{87int off_x = wstart + w;88if (off_x >= 0 && off_x < width)89{90Dtype val = bottom_data[in_offset + off_y * width + off_x];91maxidx = (val > maxval) ? (off_y * width + off_x) : maxidx;92maxval = fmax(val, maxval);93}94}95}96}97top_data[index] = maxval;98#ifdef HAVE_MASK99mask[index] = maxidx;100#endif101}102103#elif defined KERNEL_AVE_POOL104105__kernel void TEMPLATE(ave_pool_forward, Dtype)(106const int nthreads, __global const Dtype* bottom_data,107const int channels, const int height, const int width,108const int pooled_height, const int pooled_width,109__global Dtype* top_data)110{111int index = get_global_id(0);112if (index >= nthreads)113return;114115const int pw = index % pooled_width;116const int xx = index / pooled_width;117const int ph = xx % pooled_height;118const int ch = xx / pooled_height;119int hstart = ph * STRIDE_H - PAD_T;120int wstart = pw * STRIDE_W - PAD_L;121int hend = min(hstart + KERNEL_H, height + PAD_B);122int wend = min(wstart + KERNEL_W, width + PAD_R);123int pool_size;124#ifdef AVE_POOL_PADDING_AREA125pool_size = (hend - hstart) * (wend - wstart);126hstart = max(hstart, (int)0);127wstart = max(wstart, (int)0);128hend = min(hend, height);129wend = min(wend, width);130#else131hstart = max(hstart, (int)0);132wstart = max(wstart, (int)0);133hend = min(hend, height);134wend = min(wend, width);135pool_size = (hend - hstart) * (wend - wstart);136#endif137Dtype aveval = 0;138int in_offset = ch * height * width;139for (int h = hstart; h < hend; ++h)140{141for (int w = wstart; w < wend; ++w)142{143aveval += bottom_data[in_offset + h * width + w];144}145}146top_data[index] = aveval / pool_size;147}148149#elif defined KERNEL_STO_POOL150151__kernel void TEMPLATE(sto_pool_forward_test,Dtype)(152const int nthreads, __global const Dtype* bottom_data,153const int channels, const int height, const int width,154const int pooled_height, const int pooled_width,155__global Dtype* top_data)156{157for (int index = get_global_id(0); index < nthreads;158index += get_global_size(0))159{160const int pw = index % pooled_width;161const int ph = (index / pooled_width) % pooled_height;162const int c = (index / pooled_width / pooled_height) % channels;163const int n = index / pooled_width / pooled_height / channels;164const int hstart = ph * STRIDE_H;165const int hend = min(hstart + KERNEL_H, height);166const int wstart = pw * STRIDE_W;167const int wend = min(wstart + KERNEL_W, width);168// We set cumsum to be 0 to avoid divide-by-zero problems169Dtype cumsum = FLT_MIN;170Dtype cumvalues = 0.;171__global const Dtype* bottom_slice = bottom_data172+ (n * channels + c) * height * width;173// First pass: get sum174for (int h = hstart; h < hend; ++h) {175for (int w = wstart; w < wend; ++w) {176Dtype v = bottom_slice[h * width + w];177cumsum += v;178cumvalues += v * v;179}180}181top_data[index] = cumvalues / cumsum;182}183}184185#endif // KERNEL_*186187188