Path: blob/master/modules/photo/src/fast_nlmeans_denoising_invoker.hpp
16347 views
/*M///////////////////////////////////////////////////////////////////////////////////////1//2// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.3//4// By downloading, copying, installing or using the software you agree to this license.5// If you do not agree to this license, do not download, install,6// copy or use the software.7//8//9// Intel License Agreement10// For Open Source Computer Vision Library11//12// Copyright (C) 2000, Intel Corporation, all rights reserved.13// Third party copyrights are property of their respective icvers.14//15// Redistribution and use in source and binary forms, with or without modification,16// are permitted provided that the following conditions are met:17//18// * Redistribution's of source code must retain the above copyright notice,19// this list of conditions and the following disclaimer.20//21// * Redistribution's in binary form must reproduce the above copyright notice,22// this list of conditions and the following disclaimer in the documentation23// and/or other materials provided with the distribution.24//25// * The name of Intel Corporation may not be used to endorse or promote products26// derived from this software without specific prior written permission.27//28// This software is provided by the copyright holders and contributors "as is" and29// any express or implied warranties, including, but not limited to, the implied30// warranties of merchantability and fitness for a particular purpose are disclaimed.31// In no event shall the Intel Corporation or contributors be liable for any direct,32// indirect, incidental, special, exemplary, or consequential damages33// (including, but not limited to, procurement of substitute goods or services;34// loss of use, data, or profits; or business interruption) however caused35// and on any theory of liability, whether in contract, strict liability,36// or tort (including negligence or otherwise) arising in any way out of37// the use of this software, even if advised of the possibility of such damage.38//39//M*/4041#ifndef __OPENCV_FAST_NLMEANS_DENOISING_INVOKER_HPP__42#define __OPENCV_FAST_NLMEANS_DENOISING_INVOKER_HPP__4344#include "precomp.hpp"45#include <limits>4647#include "fast_nlmeans_denoising_invoker_commons.hpp"48#include "arrays.hpp"4950using namespace cv;5152template <typename T, typename IT, typename UIT, typename D, typename WT>53struct FastNlMeansDenoisingInvoker :54public ParallelLoopBody55{56public:57FastNlMeansDenoisingInvoker(const Mat& src, Mat& dst,58int template_window_size, int search_window_size, const float *h);5960void operator() (const Range& range) const CV_OVERRIDE;6162private:63void operator= (const FastNlMeansDenoisingInvoker&);6465const Mat& src_;66Mat& dst_;6768Mat extended_src_;69int border_size_;7071int template_window_size_;72int search_window_size_;7374int template_window_half_size_;75int search_window_half_size_;7677typename pixelInfo<WT>::sampleType fixed_point_mult_;78int almost_template_window_size_sq_bin_shift_;79std::vector<WT> almost_dist2weight_;8081void calcDistSumsForFirstElementInRow(82int i, Array2d<int>& dist_sums,83Array3d<int>& col_dist_sums,84Array3d<int>& up_col_dist_sums) const;8586void calcDistSumsForElementInFirstRow(87int i, int j, int first_col_num,88Array2d<int>& dist_sums,89Array3d<int>& col_dist_sums,90Array3d<int>& up_col_dist_sums) const;91};9293inline int getNearestPowerOf2(int value)94{95int p = 0;96while( 1 << p < value)97++p;98return p;99}100101template <typename T, typename IT, typename UIT, typename D, typename WT>102FastNlMeansDenoisingInvoker<T, IT, UIT, D, WT>::FastNlMeansDenoisingInvoker(103const Mat& src, Mat& dst,104int template_window_size,105int search_window_size,106const float *h) :107src_(src), dst_(dst)108{109CV_Assert(src.channels() == pixelInfo<T>::channels);110111template_window_half_size_ = template_window_size / 2;112search_window_half_size_ = search_window_size / 2;113template_window_size_ = template_window_half_size_ * 2 + 1;114search_window_size_ = search_window_half_size_ * 2 + 1;115116border_size_ = search_window_half_size_ + template_window_half_size_;117copyMakeBorder(src_, extended_src_, border_size_, border_size_, border_size_, border_size_, BORDER_DEFAULT);118119const IT max_estimate_sum_value =120(IT)search_window_size_ * (IT)search_window_size_ * (IT)pixelInfo<T>::sampleMax();121fixed_point_mult_ = (int)std::min<IT>(std::numeric_limits<IT>::max() / max_estimate_sum_value,122pixelInfo<WT>::sampleMax());123124// precalc weight for every possible l2 dist between blocks125// additional optimization of precalced weights to replace division(averaging) by binary shift126CV_Assert(template_window_size_ <= 46340); // sqrt(INT_MAX)127int template_window_size_sq = template_window_size_ * template_window_size_;128almost_template_window_size_sq_bin_shift_ = getNearestPowerOf2(template_window_size_sq);129double almost_dist2actual_dist_multiplier = ((double)(1 << almost_template_window_size_sq_bin_shift_)) / template_window_size_sq;130131int max_dist = D::template maxDist<T>();132int almost_max_dist = (int)(max_dist / almost_dist2actual_dist_multiplier + 1);133almost_dist2weight_.resize(almost_max_dist);134135for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++)136{137double dist = almost_dist * almost_dist2actual_dist_multiplier;138almost_dist2weight_[almost_dist] =139D::template calcWeight<T, WT>(dist, h, fixed_point_mult_);140}141142// additional optimization init end143if (dst_.empty())144dst_ = Mat::zeros(src_.size(), src_.type());145}146147template <typename T, typename IT, typename UIT, typename D, typename WT>148void FastNlMeansDenoisingInvoker<T, IT, UIT, D, WT>::operator() (const Range& range) const149{150int row_from = range.start;151int row_to = range.end - 1;152153// sums of cols anf rows for current pixel p154Array2d<int> dist_sums(search_window_size_, search_window_size_);155156// for lazy calc optimization (sum of cols for current pixel)157Array3d<int> col_dist_sums(template_window_size_, search_window_size_, search_window_size_);158159int first_col_num = -1;160// last elements of column sum (for each element in row)161Array3d<int> up_col_dist_sums(src_.cols, search_window_size_, search_window_size_);162163for (int i = row_from; i <= row_to; i++)164{165for (int j = 0; j < src_.cols; j++)166{167int search_window_y = i - search_window_half_size_;168int search_window_x = j - search_window_half_size_;169170// calc dist_sums171if (j == 0)172{173calcDistSumsForFirstElementInRow(i, dist_sums, col_dist_sums, up_col_dist_sums);174first_col_num = 0;175}176else177{178// calc cur dist_sums using previous dist_sums179if (i == row_from)180{181calcDistSumsForElementInFirstRow(i, j, first_col_num,182dist_sums, col_dist_sums, up_col_dist_sums);183}184else185{186int ay = border_size_ + i;187int ax = border_size_ + j + template_window_half_size_;188189int start_by = border_size_ + i - search_window_half_size_;190int start_bx = border_size_ + j - search_window_half_size_ + template_window_half_size_;191192T a_up = extended_src_.at<T>(ay - template_window_half_size_ - 1, ax);193T a_down = extended_src_.at<T>(ay + template_window_half_size_, ax);194195// copy class member to local variable for optimization196int search_window_size = search_window_size_;197198for (int y = 0; y < search_window_size; y++)199{200int * dist_sums_row = dist_sums.row_ptr(y);201int * col_dist_sums_row = col_dist_sums.row_ptr(first_col_num, y);202int * up_col_dist_sums_row = up_col_dist_sums.row_ptr(j, y);203204const T * b_up_ptr = extended_src_.ptr<T>(start_by - template_window_half_size_ - 1 + y);205const T * b_down_ptr = extended_src_.ptr<T>(start_by + template_window_half_size_ + y);206207// MSVC 2015 generates unaligned destination for "movaps" instruction for 32-bit builds208#if defined _MSC_VER && _MSC_VER == 1900 && !defined _WIN64209#pragma loop(no_vector)210#endif211for (int x = 0; x < search_window_size; x++)212{213// remove from current pixel sum column sum with index "first_col_num"214dist_sums_row[x] -= col_dist_sums_row[x];215216int bx = start_bx + x;217col_dist_sums_row[x] = up_col_dist_sums_row[x] + D::template calcUpDownDist<T>(a_up, a_down, b_up_ptr[bx], b_down_ptr[bx]);218219dist_sums_row[x] += col_dist_sums_row[x];220up_col_dist_sums_row[x] = col_dist_sums_row[x];221}222}223}224225first_col_num = (first_col_num + 1) % template_window_size_;226}227228// calc weights229IT estimation[pixelInfo<T>::channels], weights_sum[pixelInfo<WT>::channels];230for (int channel_num = 0; channel_num < pixelInfo<T>::channels; channel_num++)231estimation[channel_num] = 0;232for (int channel_num = 0; channel_num < pixelInfo<WT>::channels; channel_num++)233weights_sum[channel_num] = 0;234235for (int y = 0; y < search_window_size_; y++)236{237const T* cur_row_ptr = extended_src_.ptr<T>(border_size_ + search_window_y + y);238int* dist_sums_row = dist_sums.row_ptr(y);239for (int x = 0; x < search_window_size_; x++)240{241int almostAvgDist = dist_sums_row[x] >> almost_template_window_size_sq_bin_shift_;242WT weight = almost_dist2weight_[almostAvgDist];243T p = cur_row_ptr[border_size_ + search_window_x + x];244incWithWeight<T, IT, WT>(estimation, weights_sum, weight, p);245}246}247248divByWeightsSum<IT, UIT, pixelInfo<T>::channels, pixelInfo<WT>::channels>(estimation,249weights_sum);250dst_.at<T>(i,j) = saturateCastFromArray<T, IT>(estimation);251}252}253}254255template <typename T, typename IT, typename UIT, typename D, typename WT>256inline void FastNlMeansDenoisingInvoker<T, IT, UIT, D, WT>::calcDistSumsForFirstElementInRow(257int i,258Array2d<int>& dist_sums,259Array3d<int>& col_dist_sums,260Array3d<int>& up_col_dist_sums) const261{262int j = 0;263264for (int y = 0; y < search_window_size_; y++)265for (int x = 0; x < search_window_size_; x++)266{267dist_sums[y][x] = 0;268for (int tx = 0; tx < template_window_size_; tx++)269col_dist_sums[tx][y][x] = 0;270271int start_y = i + y - search_window_half_size_;272int start_x = j + x - search_window_half_size_;273274for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++)275for (int tx = -template_window_half_size_; tx <= template_window_half_size_; tx++)276{277int dist = D::template calcDist<T>(extended_src_,278border_size_ + i + ty, border_size_ + j + tx,279border_size_ + start_y + ty, border_size_ + start_x + tx);280281dist_sums[y][x] += dist;282col_dist_sums[tx + template_window_half_size_][y][x] += dist;283}284285up_col_dist_sums[j][y][x] = col_dist_sums[template_window_size_ - 1][y][x];286}287}288289template <typename T, typename IT, typename UIT, typename D, typename WT>290inline void FastNlMeansDenoisingInvoker<T, IT, UIT, D, WT>::calcDistSumsForElementInFirstRow(291int i, int j, int first_col_num,292Array2d<int>& dist_sums,293Array3d<int>& col_dist_sums,294Array3d<int>& up_col_dist_sums) const295{296int ay = border_size_ + i;297int ax = border_size_ + j + template_window_half_size_;298299int start_by = border_size_ + i - search_window_half_size_;300int start_bx = border_size_ + j - search_window_half_size_ + template_window_half_size_;301302int new_last_col_num = first_col_num;303304for (int y = 0; y < search_window_size_; y++)305for (int x = 0; x < search_window_size_; x++)306{307dist_sums[y][x] -= col_dist_sums[first_col_num][y][x];308309col_dist_sums[new_last_col_num][y][x] = 0;310int by = start_by + y;311int bx = start_bx + x;312for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++)313col_dist_sums[new_last_col_num][y][x] += D::template calcDist<T>(extended_src_, ay + ty, ax, by + ty, bx);314315dist_sums[y][x] += col_dist_sums[new_last_col_num][y][x];316up_col_dist_sums[j][y][x] = col_dist_sums[new_last_col_num][y][x];317}318}319320#endif321322323