Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/photo/src/fast_nlmeans_denoising_opencl.hpp
16347 views
1
// This file is part of OpenCV project.
2
// It is subject to the license terms in the LICENSE file found in the top-level directory
3
// of this distribution and at http://opencv.org/license.html.
4
5
// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
6
// Third party copyrights are property of their respective owners.
7
8
#include "precomp.hpp"
9
#ifndef __OPENCV_FAST_NLMEANS_DENOISING_OPENCL_HPP__
10
#define __OPENCV_FAST_NLMEANS_DENOISING_OPENCL_HPP__
11
12
#include "opencl_kernels_photo.hpp"
13
14
#ifdef HAVE_OPENCL
15
16
namespace cv {
17
18
enum
19
{
20
BLOCK_ROWS = 32,
21
BLOCK_COLS = 32,
22
CTA_SIZE_INTEL = 64,
23
CTA_SIZE_DEFAULT = 256
24
};
25
26
template <typename FT, typename ST, typename WT>
27
static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight,
28
int searchWindowSize, int templateWindowSize,
29
const FT *h, int hn, int cn, int normType,
30
int & almostTemplateWindowSizeSqBinShift)
31
{
32
const WT maxEstimateSumValue = searchWindowSize * searchWindowSize *
33
std::numeric_limits<ST>::max();
34
int fixedPointMult = (int)std::min<WT>(std::numeric_limits<WT>::max() / maxEstimateSumValue,
35
std::numeric_limits<int>::max());
36
int depth = DataType<FT>::depth;
37
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
38
39
if (depth == CV_64F && !doubleSupport)
40
return false;
41
42
// precalc weight for every possible l2 dist between blocks
43
// additional optimization of precalced weights to replace division(averaging) by binary shift
44
CV_Assert(templateWindowSize <= 46340); // sqrt(INT_MAX)
45
int templateWindowSizeSq = templateWindowSize * templateWindowSize;
46
almostTemplateWindowSizeSqBinShift = getNearestPowerOf2(templateWindowSizeSq);
47
FT almostDist2ActualDistMultiplier = (FT)(1 << almostTemplateWindowSizeSqBinShift) / templateWindowSizeSq;
48
49
const FT WEIGHT_THRESHOLD = 1e-3f;
50
WT maxDist = normType == NORM_L1 ? (WT)std::numeric_limits<ST>::max() * cn :
51
(WT)std::numeric_limits<ST>::max() * (WT)std::numeric_limits<ST>::max() * cn;
52
int almostMaxDist = (int)(maxDist / almostDist2ActualDistMultiplier + 1);
53
FT den[4];
54
CV_Assert(hn > 0 && hn <= 4);
55
for (int i=0; i<hn; i++)
56
den[i] = 1.0f / (h[i] * h[i] * cn);
57
58
almostDist2Weight.create(1, almostMaxDist, CV_32SC(hn == 3 ? 4 : hn));
59
60
char buf[40];
61
ocl::Kernel k("calcAlmostDist2Weight", ocl::photo::nlmeans_oclsrc,
62
format("-D OP_CALC_WEIGHTS -D FT=%s -D w_t=%s"
63
" -D wlut_t=%s -D convert_wlut_t=%s%s%s",
64
ocl::typeToStr(depth), ocl::typeToStr(CV_MAKE_TYPE(depth, hn)),
65
ocl::typeToStr(CV_32SC(hn)), ocl::convertTypeStr(depth, CV_32S, hn, buf),
66
doubleSupport ? " -D DOUBLE_SUPPORT" : "",
67
normType == NORM_L1 ? " -D ABS" : ""));
68
if (k.empty())
69
return false;
70
71
k.args(ocl::KernelArg::PtrWriteOnly(almostDist2Weight), almostMaxDist,
72
almostDist2ActualDistMultiplier, fixedPointMult,
73
ocl::KernelArg::Constant(den, (hn == 3 ? 4 : hn)*sizeof(FT)), WEIGHT_THRESHOLD);
74
75
size_t globalsize[1] = { (size_t)almostMaxDist };
76
return k.run(1, globalsize, NULL, false);
77
}
78
79
static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, const float *h, int hn,
80
int templateWindowSize, int searchWindowSize, int normType)
81
{
82
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
83
int ctaSize = ocl::Device::getDefault().isIntel() ? CTA_SIZE_INTEL : CTA_SIZE_DEFAULT;
84
Size size = _src.size();
85
86
if (cn < 1 || cn > 4 || ((normType != NORM_L2 || depth != CV_8U) &&
87
(normType != NORM_L1 || (depth != CV_8U && depth != CV_16U))))
88
return false;
89
90
int templateWindowHalfWize = templateWindowSize / 2;
91
int searchWindowHalfSize = searchWindowSize / 2;
92
templateWindowSize = templateWindowHalfWize * 2 + 1;
93
searchWindowSize = searchWindowHalfSize * 2 + 1;
94
int nblocksx = divUp(size.width, BLOCK_COLS), nblocksy = divUp(size.height, BLOCK_ROWS);
95
int almostTemplateWindowSizeSqBinShift = -1;
96
97
char buf[4][40];
98
const unsigned psz = (depth == CV_8U ? sizeof(uchar) : sizeof(ushort)) * (cn == 3 ? 4 : cn);
99
String opts = format("-D OP_CALC_FASTNLMEANS -D TEMPLATE_SIZE=%d -D SEARCH_SIZE=%d"
100
" -D pixel_t=%s -D int_t=%s -D wlut_t=%s"
101
" -D weight_t=%s -D convert_weight_t=%s -D sum_t=%s -D convert_sum_t=%s"
102
" -D BLOCK_COLS=%d -D BLOCK_ROWS=%d"
103
" -D CTA_SIZE=%d -D TEMPLATE_SIZE2=%d -D SEARCH_SIZE2=%d"
104
" -D convert_int_t=%s -D cn=%d -D psz=%u -D convert_pixel_t=%s%s",
105
templateWindowSize, searchWindowSize,
106
ocl::typeToStr(type), ocl::typeToStr(CV_32SC(cn)),
107
ocl::typeToStr(CV_32SC(hn)),
108
depth == CV_8U ? ocl::typeToStr(CV_32SC(hn)) :
109
format("long%s", hn > 1 ? format("%d", hn).c_str() : "").c_str(),
110
depth == CV_8U ? ocl::convertTypeStr(CV_32S, CV_32S, hn, buf[0]) :
111
format("convert_long%s", hn > 1 ? format("%d", hn).c_str() : "").c_str(),
112
depth == CV_8U ? ocl::typeToStr(CV_32SC(cn)) :
113
format("long%s", cn > 1 ? format("%d", cn).c_str() : "").c_str(),
114
depth == CV_8U ? ocl::convertTypeStr(depth, CV_32S, cn, buf[1]) :
115
format("convert_long%s", cn > 1 ? format("%d", cn).c_str() : "").c_str(),
116
BLOCK_COLS, BLOCK_ROWS,
117
ctaSize, templateWindowHalfWize, searchWindowHalfSize,
118
ocl::convertTypeStr(depth, CV_32S, cn, buf[2]), cn,
119
psz,
120
ocl::convertTypeStr(CV_32S, depth, cn, buf[3]),
121
normType == NORM_L1 ? " -D ABS" : "");
122
123
ocl::Kernel k("fastNlMeansDenoising", ocl::photo::nlmeans_oclsrc, opts);
124
if (k.empty())
125
return false;
126
127
UMat almostDist2Weight;
128
if ((depth == CV_8U &&
129
!ocl_calcAlmostDist2Weight<float, uchar, int>(almostDist2Weight,
130
searchWindowSize, templateWindowSize,
131
h, hn, cn, normType,
132
almostTemplateWindowSizeSqBinShift)) ||
133
(depth == CV_16U &&
134
!ocl_calcAlmostDist2Weight<float, ushort, int64>(almostDist2Weight,
135
searchWindowSize, templateWindowSize,
136
h, hn, cn, normType,
137
almostTemplateWindowSizeSqBinShift)))
138
return false;
139
CV_Assert(almostTemplateWindowSizeSqBinShift >= 0);
140
141
UMat srcex;
142
int borderSize = searchWindowHalfSize + templateWindowHalfWize;
143
if (cn == 3) {
144
srcex.create(size.height + 2*borderSize, size.width + 2*borderSize, CV_MAKE_TYPE(depth, 4));
145
UMat src(srcex, Rect(borderSize, borderSize, size.width, size.height));
146
int from_to[] = { 0,0, 1,1, 2,2 };
147
mixChannels(std::vector<UMat>(1, _src.getUMat()), std::vector<UMat>(1, src), from_to, 3);
148
copyMakeBorder(src, srcex, borderSize, borderSize, borderSize, borderSize,
149
BORDER_DEFAULT|BORDER_ISOLATED); // create borders in place
150
}
151
else
152
copyMakeBorder(_src, srcex, borderSize, borderSize, borderSize, borderSize, BORDER_DEFAULT);
153
154
_dst.create(size, type);
155
UMat dst;
156
if (cn == 3)
157
dst.create(size, CV_MAKE_TYPE(depth, 4));
158
else
159
dst = _dst.getUMat();
160
161
int searchWindowSizeSq = searchWindowSize * searchWindowSize;
162
Size upColSumSize(size.width, searchWindowSizeSq * nblocksy);
163
Size colSumSize(nblocksx * templateWindowSize, searchWindowSizeSq * nblocksy);
164
UMat buffer(upColSumSize + colSumSize, CV_32SC(cn));
165
166
srcex = srcex(Rect(Point(borderSize, borderSize), size));
167
k.args(ocl::KernelArg::ReadOnlyNoSize(srcex), ocl::KernelArg::WriteOnly(dst),
168
ocl::KernelArg::PtrReadOnly(almostDist2Weight),
169
ocl::KernelArg::PtrReadOnly(buffer), almostTemplateWindowSizeSqBinShift);
170
171
size_t globalsize[2] = { (size_t)nblocksx * ctaSize, (size_t)nblocksy }, localsize[2] = { (size_t)ctaSize, 1 };
172
if (!k.run(2, globalsize, localsize, false)) return false;
173
174
if (cn == 3) {
175
int from_to[] = { 0,0, 1,1, 2,2 };
176
mixChannels(std::vector<UMat>(1, dst), std::vector<UMat>(1, _dst.getUMat()), from_to, 3);
177
}
178
179
return true;
180
}
181
182
static bool ocl_fastNlMeansDenoisingColored( InputArray _src, OutputArray _dst,
183
float h, float hForColorComponents,
184
int templateWindowSize, int searchWindowSize)
185
{
186
UMat src = _src.getUMat();
187
_dst.create(src.size(), src.type());
188
UMat dst = _dst.getUMat();
189
190
UMat src_lab;
191
cvtColor(src, src_lab, COLOR_LBGR2Lab);
192
193
UMat l(src.size(), CV_8U);
194
UMat ab(src.size(), CV_8UC2);
195
std::vector<UMat> l_ab(2), l_ab_denoised(2);
196
l_ab[0] = l;
197
l_ab[1] = ab;
198
l_ab_denoised[0].create(src.size(), CV_8U);
199
l_ab_denoised[1].create(src.size(), CV_8UC2);
200
201
int from_to[] = { 0,0, 1,1, 2,2 };
202
mixChannels(std::vector<UMat>(1, src_lab), l_ab, from_to, 3);
203
204
fastNlMeansDenoising(l_ab[0], l_ab_denoised[0], h, templateWindowSize, searchWindowSize);
205
fastNlMeansDenoising(l_ab[1], l_ab_denoised[1], hForColorComponents, templateWindowSize, searchWindowSize);
206
207
UMat dst_lab(src.size(), CV_8UC3);
208
mixChannels(l_ab_denoised, std::vector<UMat>(1, dst_lab), from_to, 3);
209
210
cvtColor(dst_lab, dst, COLOR_Lab2LBGR, src.channels());
211
return true;
212
}
213
214
}
215
216
#endif
217
#endif
218
219