Path: blob/master/modules/photo/src/fast_nlmeans_denoising_opencl.hpp
16347 views
// This file is part of OpenCV project.1// It is subject to the license terms in the LICENSE file found in the top-level directory2// of this distribution and at http://opencv.org/license.html.34// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.5// Third party copyrights are property of their respective owners.67#include "precomp.hpp"8#ifndef __OPENCV_FAST_NLMEANS_DENOISING_OPENCL_HPP__9#define __OPENCV_FAST_NLMEANS_DENOISING_OPENCL_HPP__1011#include "opencl_kernels_photo.hpp"1213#ifdef HAVE_OPENCL1415namespace cv {1617enum18{19BLOCK_ROWS = 32,20BLOCK_COLS = 32,21CTA_SIZE_INTEL = 64,22CTA_SIZE_DEFAULT = 25623};2425template <typename FT, typename ST, typename WT>26static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight,27int searchWindowSize, int templateWindowSize,28const FT *h, int hn, int cn, int normType,29int & almostTemplateWindowSizeSqBinShift)30{31const WT maxEstimateSumValue = searchWindowSize * searchWindowSize *32std::numeric_limits<ST>::max();33int fixedPointMult = (int)std::min<WT>(std::numeric_limits<WT>::max() / maxEstimateSumValue,34std::numeric_limits<int>::max());35int depth = DataType<FT>::depth;36bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;3738if (depth == CV_64F && !doubleSupport)39return false;4041// precalc weight for every possible l2 dist between blocks42// additional optimization of precalced weights to replace division(averaging) by binary shift43CV_Assert(templateWindowSize <= 46340); // sqrt(INT_MAX)44int templateWindowSizeSq = templateWindowSize * templateWindowSize;45almostTemplateWindowSizeSqBinShift = getNearestPowerOf2(templateWindowSizeSq);46FT almostDist2ActualDistMultiplier = (FT)(1 << almostTemplateWindowSizeSqBinShift) / templateWindowSizeSq;4748const FT WEIGHT_THRESHOLD = 1e-3f;49WT maxDist = normType == NORM_L1 ? (WT)std::numeric_limits<ST>::max() * cn :50(WT)std::numeric_limits<ST>::max() * (WT)std::numeric_limits<ST>::max() * cn;51int almostMaxDist = (int)(maxDist / almostDist2ActualDistMultiplier + 1);52FT den[4];53CV_Assert(hn > 0 && hn <= 4);54for (int i=0; i<hn; i++)55den[i] = 1.0f / (h[i] * h[i] * cn);5657almostDist2Weight.create(1, almostMaxDist, CV_32SC(hn == 3 ? 4 : hn));5859char buf[40];60ocl::Kernel k("calcAlmostDist2Weight", ocl::photo::nlmeans_oclsrc,61format("-D OP_CALC_WEIGHTS -D FT=%s -D w_t=%s"62" -D wlut_t=%s -D convert_wlut_t=%s%s%s",63ocl::typeToStr(depth), ocl::typeToStr(CV_MAKE_TYPE(depth, hn)),64ocl::typeToStr(CV_32SC(hn)), ocl::convertTypeStr(depth, CV_32S, hn, buf),65doubleSupport ? " -D DOUBLE_SUPPORT" : "",66normType == NORM_L1 ? " -D ABS" : ""));67if (k.empty())68return false;6970k.args(ocl::KernelArg::PtrWriteOnly(almostDist2Weight), almostMaxDist,71almostDist2ActualDistMultiplier, fixedPointMult,72ocl::KernelArg::Constant(den, (hn == 3 ? 4 : hn)*sizeof(FT)), WEIGHT_THRESHOLD);7374size_t globalsize[1] = { (size_t)almostMaxDist };75return k.run(1, globalsize, NULL, false);76}7778static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, const float *h, int hn,79int templateWindowSize, int searchWindowSize, int normType)80{81int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);82int ctaSize = ocl::Device::getDefault().isIntel() ? CTA_SIZE_INTEL : CTA_SIZE_DEFAULT;83Size size = _src.size();8485if (cn < 1 || cn > 4 || ((normType != NORM_L2 || depth != CV_8U) &&86(normType != NORM_L1 || (depth != CV_8U && depth != CV_16U))))87return false;8889int templateWindowHalfWize = templateWindowSize / 2;90int searchWindowHalfSize = searchWindowSize / 2;91templateWindowSize = templateWindowHalfWize * 2 + 1;92searchWindowSize = searchWindowHalfSize * 2 + 1;93int nblocksx = divUp(size.width, BLOCK_COLS), nblocksy = divUp(size.height, BLOCK_ROWS);94int almostTemplateWindowSizeSqBinShift = -1;9596char buf[4][40];97const unsigned psz = (depth == CV_8U ? sizeof(uchar) : sizeof(ushort)) * (cn == 3 ? 4 : cn);98String opts = format("-D OP_CALC_FASTNLMEANS -D TEMPLATE_SIZE=%d -D SEARCH_SIZE=%d"99" -D pixel_t=%s -D int_t=%s -D wlut_t=%s"100" -D weight_t=%s -D convert_weight_t=%s -D sum_t=%s -D convert_sum_t=%s"101" -D BLOCK_COLS=%d -D BLOCK_ROWS=%d"102" -D CTA_SIZE=%d -D TEMPLATE_SIZE2=%d -D SEARCH_SIZE2=%d"103" -D convert_int_t=%s -D cn=%d -D psz=%u -D convert_pixel_t=%s%s",104templateWindowSize, searchWindowSize,105ocl::typeToStr(type), ocl::typeToStr(CV_32SC(cn)),106ocl::typeToStr(CV_32SC(hn)),107depth == CV_8U ? ocl::typeToStr(CV_32SC(hn)) :108format("long%s", hn > 1 ? format("%d", hn).c_str() : "").c_str(),109depth == CV_8U ? ocl::convertTypeStr(CV_32S, CV_32S, hn, buf[0]) :110format("convert_long%s", hn > 1 ? format("%d", hn).c_str() : "").c_str(),111depth == CV_8U ? ocl::typeToStr(CV_32SC(cn)) :112format("long%s", cn > 1 ? format("%d", cn).c_str() : "").c_str(),113depth == CV_8U ? ocl::convertTypeStr(depth, CV_32S, cn, buf[1]) :114format("convert_long%s", cn > 1 ? format("%d", cn).c_str() : "").c_str(),115BLOCK_COLS, BLOCK_ROWS,116ctaSize, templateWindowHalfWize, searchWindowHalfSize,117ocl::convertTypeStr(depth, CV_32S, cn, buf[2]), cn,118psz,119ocl::convertTypeStr(CV_32S, depth, cn, buf[3]),120normType == NORM_L1 ? " -D ABS" : "");121122ocl::Kernel k("fastNlMeansDenoising", ocl::photo::nlmeans_oclsrc, opts);123if (k.empty())124return false;125126UMat almostDist2Weight;127if ((depth == CV_8U &&128!ocl_calcAlmostDist2Weight<float, uchar, int>(almostDist2Weight,129searchWindowSize, templateWindowSize,130h, hn, cn, normType,131almostTemplateWindowSizeSqBinShift)) ||132(depth == CV_16U &&133!ocl_calcAlmostDist2Weight<float, ushort, int64>(almostDist2Weight,134searchWindowSize, templateWindowSize,135h, hn, cn, normType,136almostTemplateWindowSizeSqBinShift)))137return false;138CV_Assert(almostTemplateWindowSizeSqBinShift >= 0);139140UMat srcex;141int borderSize = searchWindowHalfSize + templateWindowHalfWize;142if (cn == 3) {143srcex.create(size.height + 2*borderSize, size.width + 2*borderSize, CV_MAKE_TYPE(depth, 4));144UMat src(srcex, Rect(borderSize, borderSize, size.width, size.height));145int from_to[] = { 0,0, 1,1, 2,2 };146mixChannels(std::vector<UMat>(1, _src.getUMat()), std::vector<UMat>(1, src), from_to, 3);147copyMakeBorder(src, srcex, borderSize, borderSize, borderSize, borderSize,148BORDER_DEFAULT|BORDER_ISOLATED); // create borders in place149}150else151copyMakeBorder(_src, srcex, borderSize, borderSize, borderSize, borderSize, BORDER_DEFAULT);152153_dst.create(size, type);154UMat dst;155if (cn == 3)156dst.create(size, CV_MAKE_TYPE(depth, 4));157else158dst = _dst.getUMat();159160int searchWindowSizeSq = searchWindowSize * searchWindowSize;161Size upColSumSize(size.width, searchWindowSizeSq * nblocksy);162Size colSumSize(nblocksx * templateWindowSize, searchWindowSizeSq * nblocksy);163UMat buffer(upColSumSize + colSumSize, CV_32SC(cn));164165srcex = srcex(Rect(Point(borderSize, borderSize), size));166k.args(ocl::KernelArg::ReadOnlyNoSize(srcex), ocl::KernelArg::WriteOnly(dst),167ocl::KernelArg::PtrReadOnly(almostDist2Weight),168ocl::KernelArg::PtrReadOnly(buffer), almostTemplateWindowSizeSqBinShift);169170size_t globalsize[2] = { (size_t)nblocksx * ctaSize, (size_t)nblocksy }, localsize[2] = { (size_t)ctaSize, 1 };171if (!k.run(2, globalsize, localsize, false)) return false;172173if (cn == 3) {174int from_to[] = { 0,0, 1,1, 2,2 };175mixChannels(std::vector<UMat>(1, dst), std::vector<UMat>(1, _dst.getUMat()), from_to, 3);176}177178return true;179}180181static bool ocl_fastNlMeansDenoisingColored( InputArray _src, OutputArray _dst,182float h, float hForColorComponents,183int templateWindowSize, int searchWindowSize)184{185UMat src = _src.getUMat();186_dst.create(src.size(), src.type());187UMat dst = _dst.getUMat();188189UMat src_lab;190cvtColor(src, src_lab, COLOR_LBGR2Lab);191192UMat l(src.size(), CV_8U);193UMat ab(src.size(), CV_8UC2);194std::vector<UMat> l_ab(2), l_ab_denoised(2);195l_ab[0] = l;196l_ab[1] = ab;197l_ab_denoised[0].create(src.size(), CV_8U);198l_ab_denoised[1].create(src.size(), CV_8UC2);199200int from_to[] = { 0,0, 1,1, 2,2 };201mixChannels(std::vector<UMat>(1, src_lab), l_ab, from_to, 3);202203fastNlMeansDenoising(l_ab[0], l_ab_denoised[0], h, templateWindowSize, searchWindowSize);204fastNlMeansDenoising(l_ab[1], l_ab_denoised[1], hForColorComponents, templateWindowSize, searchWindowSize);205206UMat dst_lab(src.size(), CV_8UC3);207mixChannels(l_ab_denoised, std::vector<UMat>(1, dst_lab), from_to, 3);208209cvtColor(dst_lab, dst, COLOR_Lab2LBGR, src.channels());210return true;211}212213}214215#endif216#endif217218219