Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/imgproc/src/demosaicing.cpp
16354 views
1
/*M///////////////////////////////////////////////////////////////////////////////////////
2
//
3
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4
//
5
// By downloading, copying, installing or using the software you agree to this license.
6
// If you do not agree to this license, do not download, install,
7
// copy or use the software.
8
//
9
//
10
// License Agreement
11
// For Open Source Computer Vision Library
12
//
13
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14
// Copyright (C) 2009-2010, Willow Garage Inc., all rights reserved.
15
// Copyright (C) 2014, Itseez Inc., all rights reserved.
16
// Third party copyrights are property of their respective owners.
17
//
18
// Redistribution and use in source and binary forms, with or without modification,
19
// are permitted provided that the following conditions are met:
20
//
21
// * Redistribution's of source code must retain the above copyright notice,
22
// this list of conditions and the following disclaimer.
23
//
24
// * Redistribution's in binary form must reproduce the above copyright notice,
25
// this list of conditions and the following disclaimer in the documentation
26
// and/or other materials provided with the distribution.
27
//
28
// * The name of the copyright holders may not be used to endorse or promote products
29
// derived from this software without specific prior written permission.
30
//
31
// This software is provided by the copyright holders and contributors "as is" and
32
// any express or implied warranties, including, but not limited to, the implied
33
// warranties of merchantability and fitness for a particular purpose are disclaimed.
34
// In no event shall the Intel Corporation or contributors be liable for any direct,
35
// indirect, incidental, special, exemplary, or consequential damages
36
// (including, but not limited to, procurement of substitute goods or services;
37
// loss of use, data, or profits; or business interruption) however caused
38
// and on any theory of liability, whether in contract, strict liability,
39
// or tort (including negligence or otherwise) arising in any way out of
40
// the use of this software, even if advised of the possibility of such damage.
41
//
42
//M*/
43
44
/********************************* COPYRIGHT NOTICE *******************************\
45
Original code for Bayer->BGR/RGB conversion is provided by Dirk Schaefer
46
from MD-Mathematische Dienste GmbH. Below is the copyright notice:
47
48
IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
49
By downloading, copying, installing or using the software you agree
50
to this license. If you do not agree to this license, do not download,
51
install, copy or use the software.
52
53
Contributors License Agreement:
54
55
Copyright (c) 2002,
56
MD-Mathematische Dienste GmbH
57
Im Defdahl 5-10
58
44141 Dortmund
59
Germany
60
www.md-it.de
61
62
Redistribution and use in source and binary forms,
63
with or without modification, are permitted provided
64
that the following conditions are met:
65
66
Redistributions of source code must retain
67
the above copyright notice, this list of conditions and the following disclaimer.
68
Redistributions in binary form must reproduce the above copyright notice,
69
this list of conditions and the following disclaimer in the documentation
70
and/or other materials provided with the distribution.
71
The name of Contributor may not be used to endorse or promote products
72
derived from this software without specific prior written permission.
73
74
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
75
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
76
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
77
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE
78
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
79
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
80
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
81
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
82
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
83
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
84
THE POSSIBILITY OF SUCH DAMAGE.
85
\**********************************************************************************/
86
87
88
#include "precomp.hpp"
89
90
#include <limits>
91
92
#define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n))
93
94
namespace cv
95
{
96
97
98
//////////////////////////// Bayer Pattern -> RGB conversion /////////////////////////////
99
100
template<typename T>
101
class SIMDBayerStubInterpolator_
102
{
103
public:
104
int bayer2Gray(const T*, int, T*, int, int, int, int) const
105
{
106
return 0;
107
}
108
109
int bayer2RGB(const T*, int, T*, int, int) const
110
{
111
return 0;
112
}
113
114
int bayer2RGBA(const T*, int, T*, int, int) const
115
{
116
return 0;
117
}
118
119
int bayer2RGB_EA(const T*, int, T*, int, int) const
120
{
121
return 0;
122
}
123
};
124
125
#if CV_SSE2
126
class SIMDBayerInterpolator_8u
127
{
128
public:
129
SIMDBayerInterpolator_8u()
130
{
131
use_simd = checkHardwareSupport(CV_CPU_SSE2);
132
}
133
134
int bayer2Gray(const uchar* bayer, int bayer_step, uchar* dst,
135
int width, int bcoeff, int gcoeff, int rcoeff) const
136
{
137
if( !use_simd )
138
return 0;
139
140
__m128i _b2y = _mm_set1_epi16((short)(rcoeff*2));
141
__m128i _g2y = _mm_set1_epi16((short)(gcoeff*2));
142
__m128i _r2y = _mm_set1_epi16((short)(bcoeff*2));
143
const uchar* bayer_end = bayer + width;
144
145
for( ; bayer <= bayer_end - 18; bayer += 14, dst += 14 )
146
{
147
__m128i r0 = _mm_loadu_si128((const __m128i*)bayer);
148
__m128i r1 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step));
149
__m128i r2 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step*2));
150
151
__m128i b1 = _mm_add_epi16(_mm_srli_epi16(_mm_slli_epi16(r0, 8), 7),
152
_mm_srli_epi16(_mm_slli_epi16(r2, 8), 7));
153
__m128i b0 = _mm_add_epi16(b1, _mm_srli_si128(b1, 2));
154
b1 = _mm_slli_epi16(_mm_srli_si128(b1, 2), 1);
155
156
__m128i g0 = _mm_add_epi16(_mm_srli_epi16(r0, 7), _mm_srli_epi16(r2, 7));
157
__m128i g1 = _mm_srli_epi16(_mm_slli_epi16(r1, 8), 7);
158
g0 = _mm_add_epi16(g0, _mm_add_epi16(g1, _mm_srli_si128(g1, 2)));
159
g1 = _mm_slli_epi16(_mm_srli_si128(g1, 2), 2);
160
161
r0 = _mm_srli_epi16(r1, 8);
162
r1 = _mm_slli_epi16(_mm_add_epi16(r0, _mm_srli_si128(r0, 2)), 2);
163
r0 = _mm_slli_epi16(r0, 3);
164
165
g0 = _mm_add_epi16(_mm_mulhi_epi16(b0, _b2y), _mm_mulhi_epi16(g0, _g2y));
166
g1 = _mm_add_epi16(_mm_mulhi_epi16(b1, _b2y), _mm_mulhi_epi16(g1, _g2y));
167
g0 = _mm_add_epi16(g0, _mm_mulhi_epi16(r0, _r2y));
168
g1 = _mm_add_epi16(g1, _mm_mulhi_epi16(r1, _r2y));
169
g0 = _mm_srli_epi16(g0, 2);
170
g1 = _mm_srli_epi16(g1, 2);
171
g0 = _mm_packus_epi16(g0, g0);
172
g1 = _mm_packus_epi16(g1, g1);
173
g0 = _mm_unpacklo_epi8(g0, g1);
174
_mm_storeu_si128((__m128i*)dst, g0);
175
}
176
177
return (int)(bayer - (bayer_end - width));
178
}
179
180
int bayer2RGB(const uchar* bayer, int bayer_step, uchar* dst, int width, int blue) const
181
{
182
if( !use_simd )
183
return 0;
184
/*
185
B G B G | B G B G | B G B G | B G B G
186
G R G R | G R G R | G R G R | G R G R
187
B G B G | B G B G | B G B G | B G B G
188
*/
189
190
__m128i delta1 = _mm_set1_epi16(1), delta2 = _mm_set1_epi16(2);
191
__m128i mask = _mm_set1_epi16(blue < 0 ? -1 : 0), z = _mm_setzero_si128();
192
__m128i masklo = _mm_set1_epi16(0x00ff);
193
const uchar* bayer_end = bayer + width;
194
195
for( ; bayer <= bayer_end - 18; bayer += 14, dst += 42 )
196
{
197
__m128i r0 = _mm_loadu_si128((const __m128i*)bayer);
198
__m128i r1 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step));
199
__m128i r2 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step*2));
200
201
__m128i b1 = _mm_add_epi16(_mm_and_si128(r0, masklo), _mm_and_si128(r2, masklo));
202
__m128i nextb1 = _mm_srli_si128(b1, 2);
203
__m128i b0 = _mm_add_epi16(b1, nextb1);
204
b1 = _mm_srli_epi16(_mm_add_epi16(nextb1, delta1), 1);
205
b0 = _mm_srli_epi16(_mm_add_epi16(b0, delta2), 2);
206
// b0 b2 ... b14 b1 b3 ... b15
207
b0 = _mm_packus_epi16(b0, b1);
208
209
__m128i g0 = _mm_add_epi16(_mm_srli_epi16(r0, 8), _mm_srli_epi16(r2, 8));
210
__m128i g1 = _mm_and_si128(r1, masklo);
211
g0 = _mm_add_epi16(g0, _mm_add_epi16(g1, _mm_srli_si128(g1, 2)));
212
g1 = _mm_srli_si128(g1, 2);
213
g0 = _mm_srli_epi16(_mm_add_epi16(g0, delta2), 2);
214
// g0 g2 ... g14 g1 g3 ... g15
215
g0 = _mm_packus_epi16(g0, g1);
216
217
r0 = _mm_srli_epi16(r1, 8);
218
r1 = _mm_add_epi16(r0, _mm_srli_si128(r0, 2));
219
r1 = _mm_srli_epi16(_mm_add_epi16(r1, delta1), 1);
220
// r0 r2 ... r14 r1 r3 ... r15
221
r0 = _mm_packus_epi16(r0, r1);
222
223
b1 = _mm_and_si128(_mm_xor_si128(b0, r0), mask);
224
b0 = _mm_xor_si128(b0, b1);
225
r0 = _mm_xor_si128(r0, b1);
226
227
// b1 g1 b3 g3 b5 g5...
228
b1 = _mm_unpackhi_epi8(b0, g0);
229
// b0 g0 b2 g2 b4 g4 ....
230
b0 = _mm_unpacklo_epi8(b0, g0);
231
232
// r1 0 r3 0 r5 0 ...
233
r1 = _mm_unpackhi_epi8(r0, z);
234
// r0 0 r2 0 r4 0 ...
235
r0 = _mm_unpacklo_epi8(r0, z);
236
237
// 0 b0 g0 r0 0 b2 g2 r2 ...
238
g0 = _mm_slli_si128(_mm_unpacklo_epi16(b0, r0), 1);
239
// 0 b8 g8 r8 0 b10 g10 r10 ...
240
g1 = _mm_slli_si128(_mm_unpackhi_epi16(b0, r0), 1);
241
242
// b1 g1 r1 0 b3 g3 r3 0 ...
243
r0 = _mm_unpacklo_epi16(b1, r1);
244
// b9 g9 r9 0 b11 g11 r11 0 ...
245
r1 = _mm_unpackhi_epi16(b1, r1);
246
247
// 0 b0 g0 r0 b1 g1 r1 0 ...
248
b0 = _mm_srli_si128(_mm_unpacklo_epi32(g0, r0), 1);
249
// 0 b4 g4 r4 b5 g5 r5 0 ...
250
b1 = _mm_srli_si128(_mm_unpackhi_epi32(g0, r0), 1);
251
252
_mm_storel_epi64((__m128i*)(dst-1+0), b0);
253
_mm_storel_epi64((__m128i*)(dst-1+6*1), _mm_srli_si128(b0, 8));
254
_mm_storel_epi64((__m128i*)(dst-1+6*2), b1);
255
_mm_storel_epi64((__m128i*)(dst-1+6*3), _mm_srli_si128(b1, 8));
256
257
// 0 b8 g8 r8 b9 g9 r9 0 ...
258
g0 = _mm_srli_si128(_mm_unpacklo_epi32(g1, r1), 1);
259
// 0 b12 g12 r12 b13 g13 r13 0 ...
260
g1 = _mm_srli_si128(_mm_unpackhi_epi32(g1, r1), 1);
261
262
_mm_storel_epi64((__m128i*)(dst-1+6*4), g0);
263
_mm_storel_epi64((__m128i*)(dst-1+6*5), _mm_srli_si128(g0, 8));
264
265
_mm_storel_epi64((__m128i*)(dst-1+6*6), g1);
266
}
267
268
return (int)(bayer - (bayer_end - width));
269
}
270
271
int bayer2RGBA(const uchar*, int, uchar*, int, int) const
272
{
273
return 0;
274
}
275
276
int bayer2RGB_EA(const uchar* bayer, int bayer_step, uchar* dst, int width, int blue) const
277
{
278
if (!use_simd)
279
return 0;
280
281
const uchar* bayer_end = bayer + width;
282
__m128i masklow = _mm_set1_epi16(0x00ff);
283
__m128i delta1 = _mm_set1_epi16(1), delta2 = _mm_set1_epi16(2);
284
__m128i full = _mm_set1_epi16(-1), z = _mm_setzero_si128();
285
__m128i mask = _mm_set1_epi16(blue > 0 ? -1 : 0);
286
287
for ( ; bayer <= bayer_end - 18; bayer += 14, dst += 42)
288
{
289
/*
290
B G B G | B G B G | B G B G | B G B G
291
G R G R | G R G R | G R G R | G R G R
292
B G B G | B G B G | B G B G | B G B G
293
*/
294
295
__m128i r0 = _mm_loadu_si128((const __m128i*)bayer);
296
__m128i r1 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step));
297
__m128i r2 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step*2));
298
299
__m128i b1 = _mm_add_epi16(_mm_and_si128(r0, masklow), _mm_and_si128(r2, masklow));
300
__m128i nextb1 = _mm_srli_si128(b1, 2);
301
__m128i b0 = _mm_add_epi16(b1, nextb1);
302
b1 = _mm_srli_epi16(_mm_add_epi16(nextb1, delta1), 1);
303
b0 = _mm_srli_epi16(_mm_add_epi16(b0, delta2), 2);
304
// b0 b2 ... b14 b1 b3 ... b15
305
b0 = _mm_packus_epi16(b0, b1);
306
307
// vertical sum
308
__m128i r0g = _mm_srli_epi16(r0, 8);
309
__m128i r2g = _mm_srli_epi16(r2, 8);
310
__m128i sumv = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(r0g, r2g), delta1), 1);
311
// gorizontal sum
312
__m128i g1 = _mm_and_si128(masklow, r1);
313
__m128i nextg1 = _mm_srli_si128(g1, 2);
314
__m128i sumg = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(g1, nextg1), delta1), 1);
315
316
// gradients
317
__m128i gradv = _mm_adds_epi16(_mm_subs_epu16(r0g, r2g), _mm_subs_epu16(r2g, r0g));
318
__m128i gradg = _mm_adds_epi16(_mm_subs_epu16(nextg1, g1), _mm_subs_epu16(g1, nextg1));
319
__m128i gmask = _mm_cmpgt_epi16(gradg, gradv);
320
321
__m128i g0 = _mm_add_epi16(_mm_and_si128(gmask, sumv), _mm_and_si128(sumg, _mm_xor_si128(gmask, full)));
322
// g0 g2 ... g14 g1 g3 ...
323
g0 = _mm_packus_epi16(g0, nextg1);
324
325
r0 = _mm_srli_epi16(r1, 8);
326
r1 = _mm_add_epi16(r0, _mm_srli_si128(r0, 2));
327
r1 = _mm_srli_epi16(_mm_add_epi16(r1, delta1), 1);
328
// r0 r2 ... r14 r1 r3 ... r15
329
r0 = _mm_packus_epi16(r0, r1);
330
331
b1 = _mm_and_si128(_mm_xor_si128(b0, r0), mask);
332
b0 = _mm_xor_si128(b0, b1);
333
r0 = _mm_xor_si128(r0, b1);
334
335
// b1 g1 b3 g3 b5 g5...
336
b1 = _mm_unpackhi_epi8(b0, g0);
337
// b0 g0 b2 g2 b4 g4 ....
338
b0 = _mm_unpacklo_epi8(b0, g0);
339
340
// r1 0 r3 0 r5 0 ...
341
r1 = _mm_unpackhi_epi8(r0, z);
342
// r0 0 r2 0 r4 0 ...
343
r0 = _mm_unpacklo_epi8(r0, z);
344
345
// 0 b0 g0 r0 0 b2 g2 r2 ...
346
g0 = _mm_slli_si128(_mm_unpacklo_epi16(b0, r0), 1);
347
// 0 b8 g8 r8 0 b10 g10 r10 ...
348
g1 = _mm_slli_si128(_mm_unpackhi_epi16(b0, r0), 1);
349
350
// b1 g1 r1 0 b3 g3 r3 0 ...
351
r0 = _mm_unpacklo_epi16(b1, r1);
352
// b9 g9 r9 0 b11 g11 r11 0 ...
353
r1 = _mm_unpackhi_epi16(b1, r1);
354
355
// 0 b0 g0 r0 b1 g1 r1 0 ...
356
b0 = _mm_srli_si128(_mm_unpacklo_epi32(g0, r0), 1);
357
// 0 b4 g4 r4 b5 g5 r5 0 ...
358
b1 = _mm_srli_si128(_mm_unpackhi_epi32(g0, r0), 1);
359
360
_mm_storel_epi64((__m128i*)(dst+0), b0);
361
_mm_storel_epi64((__m128i*)(dst+6*1), _mm_srli_si128(b0, 8));
362
_mm_storel_epi64((__m128i*)(dst+6*2), b1);
363
_mm_storel_epi64((__m128i*)(dst+6*3), _mm_srli_si128(b1, 8));
364
365
// 0 b8 g8 r8 b9 g9 r9 0 ...
366
g0 = _mm_srli_si128(_mm_unpacklo_epi32(g1, r1), 1);
367
// 0 b12 g12 r12 b13 g13 r13 0 ...
368
g1 = _mm_srli_si128(_mm_unpackhi_epi32(g1, r1), 1);
369
370
_mm_storel_epi64((__m128i*)(dst+6*4), g0);
371
_mm_storel_epi64((__m128i*)(dst+6*5), _mm_srli_si128(g0, 8));
372
373
_mm_storel_epi64((__m128i*)(dst+6*6), g1);
374
}
375
376
return int(bayer - (bayer_end - width));
377
}
378
379
bool use_simd;
380
};
381
#elif CV_NEON
382
class SIMDBayerInterpolator_8u
383
{
384
public:
385
SIMDBayerInterpolator_8u()
386
{
387
}
388
389
int bayer2Gray(const uchar* bayer, int bayer_step, uchar* dst,
390
int width, int bcoeff, int gcoeff, int rcoeff) const
391
{
392
/*
393
B G B G | B G B G | B G B G | B G B G
394
G R G R | G R G R | G R G R | G R G R
395
B G B G | B G B G | B G B G | B G B G
396
*/
397
398
uint16x8_t masklo = vdupq_n_u16(255);
399
const uchar* bayer_end = bayer + width;
400
401
for( ; bayer <= bayer_end - 18; bayer += 14, dst += 14 )
402
{
403
uint16x8_t r0 = vld1q_u16((const ushort*)bayer);
404
uint16x8_t r1 = vld1q_u16((const ushort*)(bayer + bayer_step));
405
uint16x8_t r2 = vld1q_u16((const ushort*)(bayer + bayer_step*2));
406
407
uint16x8_t b1_ = vaddq_u16(vandq_u16(r0, masklo), vandq_u16(r2, masklo));
408
uint16x8_t b1 = vextq_u16(b1_, b1_, 1);
409
uint16x8_t b0 = vaddq_u16(b1_, b1);
410
// b0 = b0 b2 b4 ...
411
// b1 = b1 b3 b5 ...
412
413
uint16x8_t g0 = vaddq_u16(vshrq_n_u16(r0, 8), vshrq_n_u16(r2, 8));
414
uint16x8_t g1 = vandq_u16(r1, masklo);
415
g0 = vaddq_u16(g0, vaddq_u16(g1, vextq_u16(g1, g1, 1)));
416
uint16x8_t rot = vextq_u16(g1, g1, 1);
417
g1 = vshlq_n_u16(rot, 2);
418
// g0 = b0 b2 b4 ...
419
// g1 = b1 b3 b5 ...
420
421
r0 = vshrq_n_u16(r1, 8);
422
r1 = vaddq_u16(r0, vextq_u16(r0, r0, 1));
423
r0 = vshlq_n_u16(r0, 2);
424
// r0 = r0 r2 r4 ...
425
// r1 = r1 r3 r5 ...
426
427
b0 = vreinterpretq_u16_s16(vqdmulhq_n_s16(vreinterpretq_s16_u16(b0), (short)(rcoeff*2)));
428
b1 = vreinterpretq_u16_s16(vqdmulhq_n_s16(vreinterpretq_s16_u16(b1), (short)(rcoeff*4)));
429
430
g0 = vreinterpretq_u16_s16(vqdmulhq_n_s16(vreinterpretq_s16_u16(g0), (short)(gcoeff*2)));
431
g1 = vreinterpretq_u16_s16(vqdmulhq_n_s16(vreinterpretq_s16_u16(g1), (short)(gcoeff*2)));
432
433
r0 = vreinterpretq_u16_s16(vqdmulhq_n_s16(vreinterpretq_s16_u16(r0), (short)(bcoeff*2)));
434
r1 = vreinterpretq_u16_s16(vqdmulhq_n_s16(vreinterpretq_s16_u16(r1), (short)(bcoeff*4)));
435
436
g0 = vaddq_u16(vaddq_u16(g0, b0), r0);
437
g1 = vaddq_u16(vaddq_u16(g1, b1), r1);
438
439
uint8x8x2_t p = vzip_u8(vrshrn_n_u16(g0, 2), vrshrn_n_u16(g1, 2));
440
vst1_u8(dst, p.val[0]);
441
vst1_u8(dst + 8, p.val[1]);
442
}
443
444
return (int)(bayer - (bayer_end - width));
445
}
446
447
int bayer2RGB(const uchar* bayer, int bayer_step, uchar* dst, int width, int blue) const
448
{
449
/*
450
B G B G | B G B G | B G B G | B G B G
451
G R G R | G R G R | G R G R | G R G R
452
B G B G | B G B G | B G B G | B G B G
453
*/
454
uint16x8_t masklo = vdupq_n_u16(255);
455
uint8x16x3_t pix;
456
const uchar* bayer_end = bayer + width;
457
458
for( ; bayer <= bayer_end - 18; bayer += 14, dst += 42 )
459
{
460
uint16x8_t r0 = vld1q_u16((const ushort*)bayer);
461
uint16x8_t r1 = vld1q_u16((const ushort*)(bayer + bayer_step));
462
uint16x8_t r2 = vld1q_u16((const ushort*)(bayer + bayer_step*2));
463
464
uint16x8_t b1 = vaddq_u16(vandq_u16(r0, masklo), vandq_u16(r2, masklo));
465
uint16x8_t nextb1 = vextq_u16(b1, b1, 1);
466
uint16x8_t b0 = vaddq_u16(b1, nextb1);
467
// b0 b1 b2 ...
468
uint8x8x2_t bb = vzip_u8(vrshrn_n_u16(b0, 2), vrshrn_n_u16(nextb1, 1));
469
pix.val[1-blue] = vcombine_u8(bb.val[0], bb.val[1]);
470
471
uint16x8_t g0 = vaddq_u16(vshrq_n_u16(r0, 8), vshrq_n_u16(r2, 8));
472
uint16x8_t g1 = vandq_u16(r1, masklo);
473
g0 = vaddq_u16(g0, vaddq_u16(g1, vextq_u16(g1, g1, 1)));
474
g1 = vextq_u16(g1, g1, 1);
475
// g0 g1 g2 ...
476
uint8x8x2_t gg = vzip_u8(vrshrn_n_u16(g0, 2), vmovn_u16(g1));
477
pix.val[1] = vcombine_u8(gg.val[0], gg.val[1]);
478
479
r0 = vshrq_n_u16(r1, 8);
480
r1 = vaddq_u16(r0, vextq_u16(r0, r0, 1));
481
// r0 r1 r2 ...
482
uint8x8x2_t rr = vzip_u8(vmovn_u16(r0), vrshrn_n_u16(r1, 1));
483
pix.val[1+blue] = vcombine_u8(rr.val[0], rr.val[1]);
484
485
vst3q_u8(dst-1, pix);
486
}
487
488
return (int)(bayer - (bayer_end - width));
489
}
490
491
int bayer2RGBA(const uchar* bayer, int bayer_step, uchar* dst, int width, int blue) const
492
{
493
/*
494
B G B G | B G B G | B G B G | B G B G
495
G R G R | G R G R | G R G R | G R G R
496
B G B G | B G B G | B G B G | B G B G
497
*/
498
uint16x8_t masklo = vdupq_n_u16(255);
499
uint8x16x4_t pix;
500
const uchar* bayer_end = bayer + width;
501
pix.val[3] = vdupq_n_u8(255);
502
503
for( ; bayer <= bayer_end - 18; bayer += 14, dst += 56 )
504
{
505
uint16x8_t r0 = vld1q_u16((const ushort*)bayer);
506
uint16x8_t r1 = vld1q_u16((const ushort*)(bayer + bayer_step));
507
uint16x8_t r2 = vld1q_u16((const ushort*)(bayer + bayer_step*2));
508
509
uint16x8_t b1 = vaddq_u16(vandq_u16(r0, masklo), vandq_u16(r2, masklo));
510
uint16x8_t nextb1 = vextq_u16(b1, b1, 1);
511
uint16x8_t b0 = vaddq_u16(b1, nextb1);
512
// b0 b1 b2 ...
513
uint8x8x2_t bb = vzip_u8(vrshrn_n_u16(b0, 2), vrshrn_n_u16(nextb1, 1));
514
pix.val[1-blue] = vcombine_u8(bb.val[0], bb.val[1]);
515
516
uint16x8_t g0 = vaddq_u16(vshrq_n_u16(r0, 8), vshrq_n_u16(r2, 8));
517
uint16x8_t g1 = vandq_u16(r1, masklo);
518
g0 = vaddq_u16(g0, vaddq_u16(g1, vextq_u16(g1, g1, 1)));
519
g1 = vextq_u16(g1, g1, 1);
520
// g0 g1 g2 ...
521
uint8x8x2_t gg = vzip_u8(vrshrn_n_u16(g0, 2), vmovn_u16(g1));
522
pix.val[1] = vcombine_u8(gg.val[0], gg.val[1]);
523
524
r0 = vshrq_n_u16(r1, 8);
525
r1 = vaddq_u16(r0, vextq_u16(r0, r0, 1));
526
// r0 r1 r2 ...
527
uint8x8x2_t rr = vzip_u8(vmovn_u16(r0), vrshrn_n_u16(r1, 1));
528
pix.val[1+blue] = vcombine_u8(rr.val[0], rr.val[1]);
529
530
vst4q_u8(dst-1, pix);
531
}
532
533
return (int)(bayer - (bayer_end - width));
534
}
535
536
int bayer2RGB_EA(const uchar*, int, uchar*, int, int) const
537
{
538
return 0;
539
}
540
};
541
#else
542
typedef SIMDBayerStubInterpolator_<uchar> SIMDBayerInterpolator_8u;
543
#endif
544
545
546
template<typename T, class SIMDInterpolator>
547
class Bayer2Gray_Invoker :
548
public ParallelLoopBody
549
{
550
public:
551
Bayer2Gray_Invoker(const Mat& _srcmat, Mat& _dstmat, int _start_with_green, bool _brow,
552
const Size& _size, int _bcoeff, int _rcoeff) :
553
ParallelLoopBody(), srcmat(_srcmat), dstmat(_dstmat), Start_with_green(_start_with_green),
554
Brow(_brow), size(_size), Bcoeff(_bcoeff), Rcoeff(_rcoeff)
555
{
556
}
557
558
virtual void operator ()(const Range& range) const CV_OVERRIDE
559
{
560
SIMDInterpolator vecOp;
561
const int G2Y = 9617;
562
const int SHIFT = 14;
563
564
const T* bayer0 = srcmat.ptr<T>();
565
int bayer_step = (int)(srcmat.step/sizeof(T));
566
T* dst0 = (T*)dstmat.data;
567
int dst_step = (int)(dstmat.step/sizeof(T));
568
int bcoeff = Bcoeff, rcoeff = Rcoeff;
569
int start_with_green = Start_with_green;
570
bool brow = Brow;
571
572
dst0 += dst_step + 1;
573
574
if (range.start % 2)
575
{
576
brow = !brow;
577
std::swap(bcoeff, rcoeff);
578
start_with_green = !start_with_green;
579
}
580
581
bayer0 += range.start * bayer_step;
582
dst0 += range.start * dst_step;
583
584
for(int i = range.start ; i < range.end; ++i, bayer0 += bayer_step, dst0 += dst_step )
585
{
586
unsigned t0, t1, t2;
587
const T* bayer = bayer0;
588
T* dst = dst0;
589
const T* bayer_end = bayer + size.width;
590
591
if( size.width <= 0 )
592
{
593
dst[-1] = dst[size.width] = 0;
594
continue;
595
}
596
597
if( start_with_green )
598
{
599
t0 = (bayer[1] + bayer[bayer_step*2+1])*rcoeff;
600
t1 = (bayer[bayer_step] + bayer[bayer_step+2])*bcoeff;
601
t2 = bayer[bayer_step+1]*(2*G2Y);
602
603
dst[0] = (T)CV_DESCALE(t0 + t1 + t2, SHIFT+1);
604
bayer++;
605
dst++;
606
}
607
608
int delta = vecOp.bayer2Gray(bayer, bayer_step, dst, size.width, bcoeff, G2Y, rcoeff);
609
bayer += delta;
610
dst += delta;
611
612
for( ; bayer <= bayer_end - 2; bayer += 2, dst += 2 )
613
{
614
t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] + bayer[bayer_step*2+2])*rcoeff;
615
t1 = (bayer[1] + bayer[bayer_step] + bayer[bayer_step+2] + bayer[bayer_step*2+1])*G2Y;
616
t2 = bayer[bayer_step+1]*(4*bcoeff);
617
dst[0] = (T)CV_DESCALE(t0 + t1 + t2, SHIFT+2);
618
619
t0 = (bayer[2] + bayer[bayer_step*2+2])*rcoeff;
620
t1 = (bayer[bayer_step+1] + bayer[bayer_step+3])*bcoeff;
621
t2 = bayer[bayer_step+2]*(2*G2Y);
622
dst[1] = (T)CV_DESCALE(t0 + t1 + t2, SHIFT+1);
623
}
624
625
if( bayer < bayer_end )
626
{
627
t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] + bayer[bayer_step*2+2])*rcoeff;
628
t1 = (bayer[1] + bayer[bayer_step] + bayer[bayer_step+2] + bayer[bayer_step*2+1])*G2Y;
629
t2 = bayer[bayer_step+1]*(4*bcoeff);
630
dst[0] = (T)CV_DESCALE(t0 + t1 + t2, SHIFT+2);
631
bayer++;
632
dst++;
633
}
634
635
dst0[-1] = dst0[0];
636
dst0[size.width] = dst0[size.width-1];
637
638
brow = !brow;
639
std::swap(bcoeff, rcoeff);
640
start_with_green = !start_with_green;
641
}
642
}
643
644
private:
645
Mat srcmat;
646
Mat dstmat;
647
int Start_with_green;
648
bool Brow;
649
Size size;
650
int Bcoeff, Rcoeff;
651
};
652
653
template<typename T, typename SIMDInterpolator>
654
static void Bayer2Gray_( const Mat& srcmat, Mat& dstmat, int code )
655
{
656
const int R2Y = 4899;
657
const int B2Y = 1868;
658
659
Size size = srcmat.size();
660
int bcoeff = B2Y, rcoeff = R2Y;
661
int start_with_green = code == CV_BayerGB2GRAY || code == CV_BayerGR2GRAY;
662
bool brow = true;
663
664
if( code != CV_BayerBG2GRAY && code != CV_BayerGB2GRAY )
665
{
666
brow = false;
667
std::swap(bcoeff, rcoeff);
668
}
669
size.height -= 2;
670
size.width -= 2;
671
672
if (size.height > 0)
673
{
674
Range range(0, size.height);
675
Bayer2Gray_Invoker<T, SIMDInterpolator> invoker(srcmat, dstmat,
676
start_with_green, brow, size, bcoeff, rcoeff);
677
parallel_for_(range, invoker, dstmat.total()/static_cast<double>(1<<16));
678
}
679
680
size = dstmat.size();
681
T* dst0 = dstmat.ptr<T>();
682
int dst_step = (int)(dstmat.step/sizeof(T));
683
if( size.height > 2 )
684
for( int i = 0; i < size.width; i++ )
685
{
686
dst0[i] = dst0[i + dst_step];
687
dst0[i + (size.height-1)*dst_step] = dst0[i + (size.height-2)*dst_step];
688
}
689
else
690
for( int i = 0; i < size.width; i++ )
691
dst0[i] = dst0[i + (size.height-1)*dst_step] = 0;
692
}
693
694
template <typename T>
695
struct Alpha
696
{
697
static T value() { return std::numeric_limits<T>::max(); }
698
};
699
700
template <>
701
struct Alpha<float>
702
{
703
static float value() { return 1.0f; }
704
};
705
706
template <typename T, typename SIMDInterpolator>
707
class Bayer2RGB_Invoker :
708
public ParallelLoopBody
709
{
710
public:
711
Bayer2RGB_Invoker(const Mat& _srcmat, Mat& _dstmat, int _start_with_green, int _blue, const Size& _size) :
712
ParallelLoopBody(),
713
srcmat(_srcmat), dstmat(_dstmat), Start_with_green(_start_with_green), Blue(_blue), size(_size)
714
{
715
}
716
717
virtual void operator() (const Range& range) const CV_OVERRIDE
718
{
719
SIMDInterpolator vecOp;
720
T alpha = Alpha<T>::value();
721
int dcn = dstmat.channels();
722
int dcn2 = dcn << 1;
723
724
int bayer_step = (int)(srcmat.step/sizeof(T));
725
const T* bayer0 = srcmat.ptr<T>() + bayer_step * range.start;
726
727
int dst_step = (int)(dstmat.step/sizeof(T));
728
T* dst0 = reinterpret_cast<T*>(dstmat.data) + (range.start + 1) * dst_step + dcn + 1;
729
730
int blue = Blue, start_with_green = Start_with_green;
731
if (range.start % 2)
732
{
733
blue = -blue;
734
start_with_green = !start_with_green;
735
}
736
737
for (int i = range.start; i < range.end; bayer0 += bayer_step, dst0 += dst_step, ++i )
738
{
739
int t0, t1;
740
const T* bayer = bayer0;
741
T* dst = dst0;
742
const T* bayer_end = bayer + size.width;
743
744
// in case of when size.width <= 2
745
if( size.width <= 0 )
746
{
747
if (dcn == 3)
748
{
749
dst[-4] = dst[-3] = dst[-2] = dst[size.width*dcn-1] =
750
dst[size.width*dcn] = dst[size.width*dcn+1] = 0;
751
}
752
else
753
{
754
dst[-5] = dst[-4] = dst[-3] = dst[size.width*dcn-1] =
755
dst[size.width*dcn] = dst[size.width*dcn+1] = 0;
756
dst[-2] = dst[size.width*dcn+2] = alpha;
757
}
758
continue;
759
}
760
761
if( start_with_green )
762
{
763
t0 = (bayer[1] + bayer[bayer_step*2+1] + 1) >> 1;
764
t1 = (bayer[bayer_step] + bayer[bayer_step+2] + 1) >> 1;
765
766
dst[-blue] = (T)t0;
767
dst[0] = bayer[bayer_step+1];
768
dst[blue] = (T)t1;
769
if (dcn == 4)
770
dst[2] = alpha; // alpha channel
771
772
bayer++;
773
dst += dcn;
774
}
775
776
// simd optimization only for dcn == 3
777
int delta = dcn == 4 ?
778
vecOp.bayer2RGBA(bayer, bayer_step, dst, size.width, blue) :
779
vecOp.bayer2RGB(bayer, bayer_step, dst, size.width, blue);
780
bayer += delta;
781
dst += delta*dcn;
782
783
if (dcn == 3) // Bayer to BGR
784
{
785
if( blue > 0 )
786
{
787
for( ; bayer <= bayer_end - 2; bayer += 2, dst += dcn2 )
788
{
789
t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] +
790
bayer[bayer_step*2+2] + 2) >> 2;
791
t1 = (bayer[1] + bayer[bayer_step] +
792
bayer[bayer_step+2] + bayer[bayer_step*2+1]+2) >> 2;
793
dst[-1] = (T)t0;
794
dst[0] = (T)t1;
795
dst[1] = bayer[bayer_step+1];
796
797
t0 = (bayer[2] + bayer[bayer_step*2+2] + 1) >> 1;
798
t1 = (bayer[bayer_step+1] + bayer[bayer_step+3] + 1) >> 1;
799
dst[2] = (T)t0;
800
dst[3] = bayer[bayer_step+2];
801
dst[4] = (T)t1;
802
}
803
}
804
else
805
{
806
for( ; bayer <= bayer_end - 2; bayer += 2, dst += dcn2 )
807
{
808
t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] +
809
bayer[bayer_step*2+2] + 2) >> 2;
810
t1 = (bayer[1] + bayer[bayer_step] +
811
bayer[bayer_step+2] + bayer[bayer_step*2+1]+2) >> 2;
812
dst[1] = (T)t0;
813
dst[0] = (T)t1;
814
dst[-1] = bayer[bayer_step+1];
815
816
t0 = (bayer[2] + bayer[bayer_step*2+2] + 1) >> 1;
817
t1 = (bayer[bayer_step+1] + bayer[bayer_step+3] + 1) >> 1;
818
dst[4] = (T)t0;
819
dst[3] = bayer[bayer_step+2];
820
dst[2] = (T)t1;
821
}
822
}
823
}
824
else // Bayer to BGRA
825
{
826
// if current row does not contain Blue pixels
827
if( blue > 0 )
828
{
829
for( ; bayer <= bayer_end - 2; bayer += 2, dst += dcn2 )
830
{
831
t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] +
832
bayer[bayer_step*2+2] + 2) >> 2;
833
t1 = (bayer[1] + bayer[bayer_step] +
834
bayer[bayer_step+2] + bayer[bayer_step*2+1]+2) >> 2;
835
dst[-1] = (T)t0;
836
dst[0] = (T)t1;
837
dst[1] = bayer[bayer_step+1];
838
dst[2] = alpha; // alpha channel
839
840
t0 = (bayer[2] + bayer[bayer_step*2+2] + 1) >> 1;
841
t1 = (bayer[bayer_step+1] + bayer[bayer_step+3] + 1) >> 1;
842
dst[3] = (T)t0;
843
dst[4] = bayer[bayer_step+2];
844
dst[5] = (T)t1;
845
dst[6] = alpha; // alpha channel
846
}
847
}
848
else // if current row contains Blue pixels
849
{
850
for( ; bayer <= bayer_end - 2; bayer += 2, dst += dcn2 )
851
{
852
t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] +
853
bayer[bayer_step*2+2] + 2) >> 2;
854
t1 = (bayer[1] + bayer[bayer_step] +
855
bayer[bayer_step+2] + bayer[bayer_step*2+1]+2) >> 2;
856
dst[-1] = bayer[bayer_step+1];
857
dst[0] = (T)t1;
858
dst[1] = (T)t0;
859
dst[2] = alpha; // alpha channel
860
861
t0 = (bayer[2] + bayer[bayer_step*2+2] + 1) >> 1;
862
t1 = (bayer[bayer_step+1] + bayer[bayer_step+3] + 1) >> 1;
863
dst[3] = (T)t1;
864
dst[4] = bayer[bayer_step+2];
865
dst[5] = (T)t0;
866
dst[6] = alpha; // alpha channel
867
}
868
}
869
}
870
871
// if skip one pixel at the end of row
872
if( bayer < bayer_end )
873
{
874
t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] +
875
bayer[bayer_step*2+2] + 2) >> 2;
876
t1 = (bayer[1] + bayer[bayer_step] +
877
bayer[bayer_step+2] + bayer[bayer_step*2+1]+2) >> 2;
878
dst[-blue] = (T)t0;
879
dst[0] = (T)t1;
880
dst[blue] = bayer[bayer_step+1];
881
if (dcn == 4)
882
dst[2] = alpha; // alpha channel
883
bayer++;
884
dst += dcn;
885
}
886
887
// fill the last and the first pixels of row accordingly
888
if (dcn == 3)
889
{
890
dst0[-4] = dst0[-1];
891
dst0[-3] = dst0[0];
892
dst0[-2] = dst0[1];
893
dst0[size.width*dcn-1] = dst0[size.width*dcn-4];
894
dst0[size.width*dcn] = dst0[size.width*dcn-3];
895
dst0[size.width*dcn+1] = dst0[size.width*dcn-2];
896
}
897
else
898
{
899
dst0[-5] = dst0[-1];
900
dst0[-4] = dst0[0];
901
dst0[-3] = dst0[1];
902
dst0[-2] = dst0[2]; // alpha channel
903
dst0[size.width*dcn-1] = dst0[size.width*dcn-5];
904
dst0[size.width*dcn] = dst0[size.width*dcn-4];
905
dst0[size.width*dcn+1] = dst0[size.width*dcn-3];
906
dst0[size.width*dcn+2] = dst0[size.width*dcn-2]; // alpha channel
907
}
908
909
blue = -blue;
910
start_with_green = !start_with_green;
911
}
912
}
913
914
private:
915
Mat srcmat;
916
Mat dstmat;
917
int Start_with_green, Blue;
918
Size size;
919
};
920
921
template<typename T, class SIMDInterpolator>
922
static void Bayer2RGB_( const Mat& srcmat, Mat& dstmat, int code )
923
{
924
int dst_step = (int)(dstmat.step/sizeof(T));
925
Size size = srcmat.size();
926
int blue = (code == CV_BayerBG2BGR || code == CV_BayerGB2BGR ||
927
code == CV_BayerBG2BGRA || code == CV_BayerGB2BGRA ) ? -1 : 1;
928
int start_with_green = (code == CV_BayerGB2BGR || code == CV_BayerGR2BGR ||
929
code == CV_BayerGB2BGRA || code == CV_BayerGR2BGRA);
930
931
int dcn = dstmat.channels();
932
size.height -= 2;
933
size.width -= 2;
934
935
if (size.height > 0)
936
{
937
Range range(0, size.height);
938
Bayer2RGB_Invoker<T, SIMDInterpolator> invoker(srcmat, dstmat, start_with_green, blue, size);
939
parallel_for_(range, invoker, dstmat.total()/static_cast<double>(1<<16));
940
}
941
942
// filling the first and the last rows
943
size = dstmat.size();
944
T* dst0 = dstmat.ptr<T>();
945
if( size.height > 2 )
946
for( int i = 0; i < size.width*dcn; i++ )
947
{
948
dst0[i] = dst0[i + dst_step];
949
dst0[i + (size.height-1)*dst_step] = dst0[i + (size.height-2)*dst_step];
950
}
951
else
952
for( int i = 0; i < size.width*dcn; i++ )
953
dst0[i] = dst0[i + (size.height-1)*dst_step] = 0;
954
}
955
956
957
/////////////////// Demosaicing using Variable Number of Gradients ///////////////////////
958
959
static void Bayer2RGB_VNG_8u( const Mat& srcmat, Mat& dstmat, int code )
960
{
961
const uchar* bayer = srcmat.ptr();
962
int bstep = (int)srcmat.step;
963
uchar* dst = dstmat.ptr();
964
int dststep = (int)dstmat.step;
965
Size size = srcmat.size();
966
967
int blueIdx = code == CV_BayerBG2BGR_VNG || code == CV_BayerGB2BGR_VNG ? 0 : 2;
968
bool greenCell0 = code != CV_BayerBG2BGR_VNG && code != CV_BayerRG2BGR_VNG;
969
970
// for too small images use the simple interpolation algorithm
971
if( MIN(size.width, size.height) < 8 )
972
{
973
Bayer2RGB_<uchar, SIMDBayerInterpolator_8u>( srcmat, dstmat, code );
974
return;
975
}
976
977
const int brows = 3, bcn = 7;
978
int N = size.width, N2 = N*2, N3 = N*3, N4 = N*4, N5 = N*5, N6 = N*6, N7 = N*7;
979
int i, bufstep = N7*bcn;
980
cv::AutoBuffer<ushort> _buf(bufstep*brows);
981
ushort* buf = _buf.data();
982
983
bayer += bstep*2;
984
985
#if CV_SSE2
986
bool haveSSE = cv::checkHardwareSupport(CV_CPU_SSE2);
987
#define _mm_absdiff_epu16(a,b) _mm_adds_epu16(_mm_subs_epu16(a, b), _mm_subs_epu16(b, a))
988
#endif
989
990
for( int y = 2; y < size.height - 4; y++ )
991
{
992
uchar* dstrow = dst + dststep*y + 6;
993
const uchar* srow;
994
995
for( int dy = (y == 2 ? -1 : 1); dy <= 1; dy++ )
996
{
997
ushort* brow = buf + ((y + dy - 1)%brows)*bufstep + 1;
998
srow = bayer + (y+dy)*bstep + 1;
999
1000
for( i = 0; i < bcn; i++ )
1001
brow[N*i-1] = brow[(N-2) + N*i] = 0;
1002
1003
i = 1;
1004
1005
#if CV_SSE2
1006
if( haveSSE )
1007
{
1008
__m128i z = _mm_setzero_si128();
1009
for( ; i <= N-9; i += 8, srow += 8, brow += 8 )
1010
{
1011
__m128i s1, s2, s3, s4, s6, s7, s8, s9;
1012
1013
s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow-1-bstep)),z);
1014
s2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow-bstep)),z);
1015
s3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow+1-bstep)),z);
1016
1017
s4 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow-1)),z);
1018
s6 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow+1)),z);
1019
1020
s7 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow-1+bstep)),z);
1021
s8 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow+bstep)),z);
1022
s9 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow+1+bstep)),z);
1023
1024
__m128i b0, b1, b2, b3, b4, b5, b6;
1025
1026
b0 = _mm_adds_epu16(_mm_slli_epi16(_mm_absdiff_epu16(s2,s8),1),
1027
_mm_adds_epu16(_mm_absdiff_epu16(s1, s7),
1028
_mm_absdiff_epu16(s3, s9)));
1029
b1 = _mm_adds_epu16(_mm_slli_epi16(_mm_absdiff_epu16(s4,s6),1),
1030
_mm_adds_epu16(_mm_absdiff_epu16(s1, s3),
1031
_mm_absdiff_epu16(s7, s9)));
1032
b2 = _mm_slli_epi16(_mm_absdiff_epu16(s3,s7),1);
1033
b3 = _mm_slli_epi16(_mm_absdiff_epu16(s1,s9),1);
1034
1035
_mm_storeu_si128((__m128i*)brow, b0);
1036
_mm_storeu_si128((__m128i*)(brow + N), b1);
1037
_mm_storeu_si128((__m128i*)(brow + N2), b2);
1038
_mm_storeu_si128((__m128i*)(brow + N3), b3);
1039
1040
b4 = _mm_adds_epu16(b2,_mm_adds_epu16(_mm_absdiff_epu16(s2, s4),
1041
_mm_absdiff_epu16(s6, s8)));
1042
b5 = _mm_adds_epu16(b3,_mm_adds_epu16(_mm_absdiff_epu16(s2, s6),
1043
_mm_absdiff_epu16(s4, s8)));
1044
b6 = _mm_adds_epu16(_mm_adds_epu16(s2, s4), _mm_adds_epu16(s6, s8));
1045
b6 = _mm_srli_epi16(b6, 1);
1046
1047
_mm_storeu_si128((__m128i*)(brow + N4), b4);
1048
_mm_storeu_si128((__m128i*)(brow + N5), b5);
1049
_mm_storeu_si128((__m128i*)(brow + N6), b6);
1050
}
1051
}
1052
#endif
1053
1054
for( ; i < N-1; i++, srow++, brow++ )
1055
{
1056
brow[0] = (ushort)(std::abs(srow[-1-bstep] - srow[-1+bstep]) +
1057
std::abs(srow[-bstep] - srow[+bstep])*2 +
1058
std::abs(srow[1-bstep] - srow[1+bstep]));
1059
brow[N] = (ushort)(std::abs(srow[-1-bstep] - srow[1-bstep]) +
1060
std::abs(srow[-1] - srow[1])*2 +
1061
std::abs(srow[-1+bstep] - srow[1+bstep]));
1062
brow[N2] = (ushort)(std::abs(srow[+1-bstep] - srow[-1+bstep])*2);
1063
brow[N3] = (ushort)(std::abs(srow[-1-bstep] - srow[1+bstep])*2);
1064
brow[N4] = (ushort)(brow[N2] + std::abs(srow[-bstep] - srow[-1]) +
1065
std::abs(srow[+bstep] - srow[1]));
1066
brow[N5] = (ushort)(brow[N3] + std::abs(srow[-bstep] - srow[1]) +
1067
std::abs(srow[+bstep] - srow[-1]));
1068
brow[N6] = (ushort)((srow[-bstep] + srow[-1] + srow[1] + srow[+bstep])>>1);
1069
}
1070
}
1071
1072
const ushort* brow0 = buf + ((y - 2) % brows)*bufstep + 2;
1073
const ushort* brow1 = buf + ((y - 1) % brows)*bufstep + 2;
1074
const ushort* brow2 = buf + (y % brows)*bufstep + 2;
1075
static const float scale[] = { 0.f, 0.5f, 0.25f, 0.1666666666667f, 0.125f, 0.1f, 0.08333333333f, 0.0714286f, 0.0625f };
1076
srow = bayer + y*bstep + 2;
1077
bool greenCell = greenCell0;
1078
1079
i = 2;
1080
#if CV_SSE2
1081
int limit = !haveSSE ? N-2 : greenCell ? std::min(3, N-2) : 2;
1082
#else
1083
int limit = N - 2;
1084
#endif
1085
1086
do
1087
{
1088
for( ; i < limit; i++, srow++, brow0++, brow1++, brow2++, dstrow += 3 )
1089
{
1090
int gradN = brow0[0] + brow1[0];
1091
int gradS = brow1[0] + brow2[0];
1092
int gradW = brow1[N-1] + brow1[N];
1093
int gradE = brow1[N] + brow1[N+1];
1094
int minGrad = std::min(std::min(std::min(gradN, gradS), gradW), gradE);
1095
int maxGrad = std::max(std::max(std::max(gradN, gradS), gradW), gradE);
1096
int R, G, B;
1097
1098
if( !greenCell )
1099
{
1100
int gradNE = brow0[N4+1] + brow1[N4];
1101
int gradSW = brow1[N4] + brow2[N4-1];
1102
int gradNW = brow0[N5-1] + brow1[N5];
1103
int gradSE = brow1[N5] + brow2[N5+1];
1104
1105
minGrad = std::min(std::min(std::min(std::min(minGrad, gradNE), gradSW), gradNW), gradSE);
1106
maxGrad = std::max(std::max(std::max(std::max(maxGrad, gradNE), gradSW), gradNW), gradSE);
1107
int T = minGrad + MAX(maxGrad/2, 1);
1108
1109
int Rs = 0, Gs = 0, Bs = 0, ng = 0;
1110
if( gradN < T )
1111
{
1112
Rs += srow[-bstep*2] + srow[0];
1113
Gs += srow[-bstep]*2;
1114
Bs += srow[-bstep-1] + srow[-bstep+1];
1115
ng++;
1116
}
1117
if( gradS < T )
1118
{
1119
Rs += srow[bstep*2] + srow[0];
1120
Gs += srow[bstep]*2;
1121
Bs += srow[bstep-1] + srow[bstep+1];
1122
ng++;
1123
}
1124
if( gradW < T )
1125
{
1126
Rs += srow[-2] + srow[0];
1127
Gs += srow[-1]*2;
1128
Bs += srow[-bstep-1] + srow[bstep-1];
1129
ng++;
1130
}
1131
if( gradE < T )
1132
{
1133
Rs += srow[2] + srow[0];
1134
Gs += srow[1]*2;
1135
Bs += srow[-bstep+1] + srow[bstep+1];
1136
ng++;
1137
}
1138
if( gradNE < T )
1139
{
1140
Rs += srow[-bstep*2+2] + srow[0];
1141
Gs += brow0[N6+1];
1142
Bs += srow[-bstep+1]*2;
1143
ng++;
1144
}
1145
if( gradSW < T )
1146
{
1147
Rs += srow[bstep*2-2] + srow[0];
1148
Gs += brow2[N6-1];
1149
Bs += srow[bstep-1]*2;
1150
ng++;
1151
}
1152
if( gradNW < T )
1153
{
1154
Rs += srow[-bstep*2-2] + srow[0];
1155
Gs += brow0[N6-1];
1156
Bs += srow[-bstep+1]*2;
1157
ng++;
1158
}
1159
if( gradSE < T )
1160
{
1161
Rs += srow[bstep*2+2] + srow[0];
1162
Gs += brow2[N6+1];
1163
Bs += srow[-bstep+1]*2;
1164
ng++;
1165
}
1166
R = srow[0];
1167
G = R + cvRound((Gs - Rs)*scale[ng]);
1168
B = R + cvRound((Bs - Rs)*scale[ng]);
1169
}
1170
else
1171
{
1172
int gradNE = brow0[N2] + brow0[N2+1] + brow1[N2] + brow1[N2+1];
1173
int gradSW = brow1[N2] + brow1[N2-1] + brow2[N2] + brow2[N2-1];
1174
int gradNW = brow0[N3] + brow0[N3-1] + brow1[N3] + brow1[N3-1];
1175
int gradSE = brow1[N3] + brow1[N3+1] + brow2[N3] + brow2[N3+1];
1176
1177
minGrad = std::min(std::min(std::min(std::min(minGrad, gradNE), gradSW), gradNW), gradSE);
1178
maxGrad = std::max(std::max(std::max(std::max(maxGrad, gradNE), gradSW), gradNW), gradSE);
1179
int T = minGrad + MAX(maxGrad/2, 1);
1180
1181
int Rs = 0, Gs = 0, Bs = 0, ng = 0;
1182
if( gradN < T )
1183
{
1184
Rs += srow[-bstep*2-1] + srow[-bstep*2+1];
1185
Gs += srow[-bstep*2] + srow[0];
1186
Bs += srow[-bstep]*2;
1187
ng++;
1188
}
1189
if( gradS < T )
1190
{
1191
Rs += srow[bstep*2-1] + srow[bstep*2+1];
1192
Gs += srow[bstep*2] + srow[0];
1193
Bs += srow[bstep]*2;
1194
ng++;
1195
}
1196
if( gradW < T )
1197
{
1198
Rs += srow[-1]*2;
1199
Gs += srow[-2] + srow[0];
1200
Bs += srow[-bstep-2]+srow[bstep-2];
1201
ng++;
1202
}
1203
if( gradE < T )
1204
{
1205
Rs += srow[1]*2;
1206
Gs += srow[2] + srow[0];
1207
Bs += srow[-bstep+2]+srow[bstep+2];
1208
ng++;
1209
}
1210
if( gradNE < T )
1211
{
1212
Rs += srow[-bstep*2+1] + srow[1];
1213
Gs += srow[-bstep+1]*2;
1214
Bs += srow[-bstep] + srow[-bstep+2];
1215
ng++;
1216
}
1217
if( gradSW < T )
1218
{
1219
Rs += srow[bstep*2-1] + srow[-1];
1220
Gs += srow[bstep-1]*2;
1221
Bs += srow[bstep] + srow[bstep-2];
1222
ng++;
1223
}
1224
if( gradNW < T )
1225
{
1226
Rs += srow[-bstep*2-1] + srow[-1];
1227
Gs += srow[-bstep-1]*2;
1228
Bs += srow[-bstep-2]+srow[-bstep];
1229
ng++;
1230
}
1231
if( gradSE < T )
1232
{
1233
Rs += srow[bstep*2+1] + srow[1];
1234
Gs += srow[bstep+1]*2;
1235
Bs += srow[bstep+2]+srow[bstep];
1236
ng++;
1237
}
1238
G = srow[0];
1239
R = G + cvRound((Rs - Gs)*scale[ng]);
1240
B = G + cvRound((Bs - Gs)*scale[ng]);
1241
}
1242
dstrow[blueIdx] = cv::saturate_cast<uchar>(B);
1243
dstrow[1] = cv::saturate_cast<uchar>(G);
1244
dstrow[blueIdx^2] = cv::saturate_cast<uchar>(R);
1245
greenCell = !greenCell;
1246
}
1247
1248
#if CV_SSE2
1249
if( !haveSSE )
1250
break;
1251
1252
__m128i emask = _mm_set1_epi32(0x0000ffff),
1253
omask = _mm_set1_epi32(0xffff0000),
1254
z = _mm_setzero_si128(),
1255
one = _mm_set1_epi16(1);
1256
__m128 _0_5 = _mm_set1_ps(0.5f);
1257
1258
#define _mm_merge_epi16(a, b) _mm_or_si128(_mm_and_si128(a, emask), _mm_and_si128(b, omask)) //(aA_aA_aA_aA) * (bB_bB_bB_bB) => (bA_bA_bA_bA)
1259
#define _mm_cvtloepi16_ps(a) _mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpacklo_epi16(a,a), 16)) //(1,2,3,4,5,6,7,8) => (1f,2f,3f,4f)
1260
#define _mm_cvthiepi16_ps(a) _mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpackhi_epi16(a,a), 16)) //(1,2,3,4,5,6,7,8) => (5f,6f,7f,8f)
1261
#define _mm_loadl_u8_s16(ptr, offset) _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)((ptr) + (offset))), z) //load 8 uchars to 8 shorts
1262
1263
// process 8 pixels at once
1264
for( ; i <= N - 10; i += 8, srow += 8, brow0 += 8, brow1 += 8, brow2 += 8 )
1265
{
1266
//int gradN = brow0[0] + brow1[0];
1267
__m128i gradN = _mm_adds_epi16(_mm_loadu_si128((__m128i*)brow0), _mm_loadu_si128((__m128i*)brow1));
1268
1269
//int gradS = brow1[0] + brow2[0];
1270
__m128i gradS = _mm_adds_epi16(_mm_loadu_si128((__m128i*)brow1), _mm_loadu_si128((__m128i*)brow2));
1271
1272
//int gradW = brow1[N-1] + brow1[N];
1273
__m128i gradW = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N-1)), _mm_loadu_si128((__m128i*)(brow1+N)));
1274
1275
//int gradE = brow1[N+1] + brow1[N];
1276
__m128i gradE = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N+1)), _mm_loadu_si128((__m128i*)(brow1+N)));
1277
1278
//int minGrad = std::min(std::min(std::min(gradN, gradS), gradW), gradE);
1279
//int maxGrad = std::max(std::max(std::max(gradN, gradS), gradW), gradE);
1280
__m128i minGrad = _mm_min_epi16(_mm_min_epi16(gradN, gradS), _mm_min_epi16(gradW, gradE));
1281
__m128i maxGrad = _mm_max_epi16(_mm_max_epi16(gradN, gradS), _mm_max_epi16(gradW, gradE));
1282
1283
__m128i grad0, grad1;
1284
1285
//int gradNE = brow0[N4+1] + brow1[N4];
1286
//int gradNE = brow0[N2] + brow0[N2+1] + brow1[N2] + brow1[N2+1];
1287
grad0 = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow0+N4+1)), _mm_loadu_si128((__m128i*)(brow1+N4)));
1288
grad1 = _mm_adds_epi16( _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow0+N2)), _mm_loadu_si128((__m128i*)(brow0+N2+1))),
1289
_mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N2)), _mm_loadu_si128((__m128i*)(brow1+N2+1))));
1290
__m128i gradNE = _mm_merge_epi16(grad0, grad1);
1291
1292
//int gradSW = brow1[N4] + brow2[N4-1];
1293
//int gradSW = brow1[N2] + brow1[N2-1] + brow2[N2] + brow2[N2-1];
1294
grad0 = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow2+N4-1)), _mm_loadu_si128((__m128i*)(brow1+N4)));
1295
grad1 = _mm_adds_epi16(_mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow2+N2)), _mm_loadu_si128((__m128i*)(brow2+N2-1))),
1296
_mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N2)), _mm_loadu_si128((__m128i*)(brow1+N2-1))));
1297
__m128i gradSW = _mm_merge_epi16(grad0, grad1);
1298
1299
minGrad = _mm_min_epi16(_mm_min_epi16(minGrad, gradNE), gradSW);
1300
maxGrad = _mm_max_epi16(_mm_max_epi16(maxGrad, gradNE), gradSW);
1301
1302
//int gradNW = brow0[N5-1] + brow1[N5];
1303
//int gradNW = brow0[N3] + brow0[N3-1] + brow1[N3] + brow1[N3-1];
1304
grad0 = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow0+N5-1)), _mm_loadu_si128((__m128i*)(brow1+N5)));
1305
grad1 = _mm_adds_epi16(_mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow0+N3)), _mm_loadu_si128((__m128i*)(brow0+N3-1))),
1306
_mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N3)), _mm_loadu_si128((__m128i*)(brow1+N3-1))));
1307
__m128i gradNW = _mm_merge_epi16(grad0, grad1);
1308
1309
//int gradSE = brow1[N5] + brow2[N5+1];
1310
//int gradSE = brow1[N3] + brow1[N3+1] + brow2[N3] + brow2[N3+1];
1311
grad0 = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow2+N5+1)), _mm_loadu_si128((__m128i*)(brow1+N5)));
1312
grad1 = _mm_adds_epi16(_mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow2+N3)), _mm_loadu_si128((__m128i*)(brow2+N3+1))),
1313
_mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N3)), _mm_loadu_si128((__m128i*)(brow1+N3+1))));
1314
__m128i gradSE = _mm_merge_epi16(grad0, grad1);
1315
1316
minGrad = _mm_min_epi16(_mm_min_epi16(minGrad, gradNW), gradSE);
1317
maxGrad = _mm_max_epi16(_mm_max_epi16(maxGrad, gradNW), gradSE);
1318
1319
//int T = minGrad + maxGrad/2;
1320
__m128i T = _mm_adds_epi16(_mm_max_epi16(_mm_srli_epi16(maxGrad, 1), one), minGrad);
1321
1322
__m128i RGs = z, GRs = z, Bs = z, ng = z;
1323
1324
__m128i x0 = _mm_loadl_u8_s16(srow, +0 );
1325
__m128i x1 = _mm_loadl_u8_s16(srow, -1 - bstep );
1326
__m128i x2 = _mm_loadl_u8_s16(srow, -1 - bstep*2);
1327
__m128i x3 = _mm_loadl_u8_s16(srow, - bstep );
1328
__m128i x4 = _mm_loadl_u8_s16(srow, +1 - bstep*2);
1329
__m128i x5 = _mm_loadl_u8_s16(srow, +1 - bstep );
1330
__m128i x6 = _mm_loadl_u8_s16(srow, +2 - bstep );
1331
__m128i x7 = _mm_loadl_u8_s16(srow, +1 );
1332
__m128i x8 = _mm_loadl_u8_s16(srow, +2 + bstep );
1333
__m128i x9 = _mm_loadl_u8_s16(srow, +1 + bstep );
1334
__m128i x10 = _mm_loadl_u8_s16(srow, +1 + bstep*2);
1335
__m128i x11 = _mm_loadl_u8_s16(srow, + bstep );
1336
__m128i x12 = _mm_loadl_u8_s16(srow, -1 + bstep*2);
1337
__m128i x13 = _mm_loadl_u8_s16(srow, -1 + bstep );
1338
__m128i x14 = _mm_loadl_u8_s16(srow, -2 + bstep );
1339
__m128i x15 = _mm_loadl_u8_s16(srow, -1 );
1340
__m128i x16 = _mm_loadl_u8_s16(srow, -2 - bstep );
1341
1342
__m128i t0, t1, mask;
1343
1344
// gradN ***********************************************
1345
mask = _mm_cmpgt_epi16(T, gradN); // mask = T>gradN
1346
ng = _mm_sub_epi16(ng, mask); // ng += (T>gradN)
1347
1348
t0 = _mm_slli_epi16(x3, 1); // srow[-bstep]*2
1349
t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, -bstep*2), x0); // srow[-bstep*2] + srow[0]
1350
1351
// RGs += (srow[-bstep*2] + srow[0]) * (T>gradN)
1352
RGs = _mm_adds_epi16(RGs, _mm_and_si128(t1, mask));
1353
// GRs += {srow[-bstep]*2; (srow[-bstep*2-1] + srow[-bstep*2+1])} * (T>gradN)
1354
GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(t0, _mm_adds_epi16(x2,x4)), mask));
1355
// Bs += {(srow[-bstep-1]+srow[-bstep+1]); srow[-bstep]*2 } * (T>gradN)
1356
Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_adds_epi16(x1,x5), t0), mask));
1357
1358
// gradNE **********************************************
1359
mask = _mm_cmpgt_epi16(T, gradNE); // mask = T>gradNE
1360
ng = _mm_sub_epi16(ng, mask); // ng += (T>gradNE)
1361
1362
t0 = _mm_slli_epi16(x5, 1); // srow[-bstep+1]*2
1363
t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, -bstep*2+2), x0); // srow[-bstep*2+2] + srow[0]
1364
1365
// RGs += {(srow[-bstep*2+2] + srow[0]); srow[-bstep+1]*2} * (T>gradNE)
1366
RGs = _mm_adds_epi16(RGs, _mm_and_si128(_mm_merge_epi16(t1, t0), mask));
1367
// GRs += {brow0[N6+1]; (srow[-bstep*2+1] + srow[1])} * (T>gradNE)
1368
GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(_mm_loadu_si128((__m128i*)(brow0+N6+1)), _mm_adds_epi16(x4,x7)), mask));
1369
// Bs += {srow[-bstep+1]*2; (srow[-bstep] + srow[-bstep+2])} * (T>gradNE)
1370
Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(t0,_mm_adds_epi16(x3,x6)), mask));
1371
1372
// gradE ***********************************************
1373
mask = _mm_cmpgt_epi16(T, gradE); // mask = T>gradE
1374
ng = _mm_sub_epi16(ng, mask); // ng += (T>gradE)
1375
1376
t0 = _mm_slli_epi16(x7, 1); // srow[1]*2
1377
t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, 2), x0); // srow[2] + srow[0]
1378
1379
// RGs += (srow[2] + srow[0]) * (T>gradE)
1380
RGs = _mm_adds_epi16(RGs, _mm_and_si128(t1, mask));
1381
// GRs += (srow[1]*2) * (T>gradE)
1382
GRs = _mm_adds_epi16(GRs, _mm_and_si128(t0, mask));
1383
// Bs += {(srow[-bstep+1]+srow[bstep+1]); (srow[-bstep+2]+srow[bstep+2])} * (T>gradE)
1384
Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_adds_epi16(x5,x9), _mm_adds_epi16(x6,x8)), mask));
1385
1386
// gradSE **********************************************
1387
mask = _mm_cmpgt_epi16(T, gradSE); // mask = T>gradSE
1388
ng = _mm_sub_epi16(ng, mask); // ng += (T>gradSE)
1389
1390
t0 = _mm_slli_epi16(x9, 1); // srow[bstep+1]*2
1391
t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, bstep*2+2), x0); // srow[bstep*2+2] + srow[0]
1392
1393
// RGs += {(srow[bstep*2+2] + srow[0]); srow[bstep+1]*2} * (T>gradSE)
1394
RGs = _mm_adds_epi16(RGs, _mm_and_si128(_mm_merge_epi16(t1, t0), mask));
1395
// GRs += {brow2[N6+1]; (srow[1]+srow[bstep*2+1])} * (T>gradSE)
1396
GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(_mm_loadu_si128((__m128i*)(brow2+N6+1)), _mm_adds_epi16(x7,x10)), mask));
1397
// Bs += {srow[-bstep+1]*2; (srow[bstep+2]+srow[bstep])} * (T>gradSE)
1398
Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_slli_epi16(x5, 1), _mm_adds_epi16(x8,x11)), mask));
1399
1400
// gradS ***********************************************
1401
mask = _mm_cmpgt_epi16(T, gradS); // mask = T>gradS
1402
ng = _mm_sub_epi16(ng, mask); // ng += (T>gradS)
1403
1404
t0 = _mm_slli_epi16(x11, 1); // srow[bstep]*2
1405
t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow,bstep*2), x0); // srow[bstep*2]+srow[0]
1406
1407
// RGs += (srow[bstep*2]+srow[0]) * (T>gradS)
1408
RGs = _mm_adds_epi16(RGs, _mm_and_si128(t1, mask));
1409
// GRs += {srow[bstep]*2; (srow[bstep*2+1]+srow[bstep*2-1])} * (T>gradS)
1410
GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(t0, _mm_adds_epi16(x10,x12)), mask));
1411
// Bs += {(srow[bstep+1]+srow[bstep-1]); srow[bstep]*2} * (T>gradS)
1412
Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_adds_epi16(x9,x13), t0), mask));
1413
1414
// gradSW **********************************************
1415
mask = _mm_cmpgt_epi16(T, gradSW); // mask = T>gradSW
1416
ng = _mm_sub_epi16(ng, mask); // ng += (T>gradSW)
1417
1418
t0 = _mm_slli_epi16(x13, 1); // srow[bstep-1]*2
1419
t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, bstep*2-2), x0); // srow[bstep*2-2]+srow[0]
1420
1421
// RGs += {(srow[bstep*2-2]+srow[0]); srow[bstep-1]*2} * (T>gradSW)
1422
RGs = _mm_adds_epi16(RGs, _mm_and_si128(_mm_merge_epi16(t1, t0), mask));
1423
// GRs += {brow2[N6-1]; (srow[bstep*2-1]+srow[-1])} * (T>gradSW)
1424
GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(_mm_loadu_si128((__m128i*)(brow2+N6-1)), _mm_adds_epi16(x12,x15)), mask));
1425
// Bs += {srow[bstep-1]*2; (srow[bstep]+srow[bstep-2])} * (T>gradSW)
1426
Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(t0,_mm_adds_epi16(x11,x14)), mask));
1427
1428
// gradW ***********************************************
1429
mask = _mm_cmpgt_epi16(T, gradW); // mask = T>gradW
1430
ng = _mm_sub_epi16(ng, mask); // ng += (T>gradW)
1431
1432
t0 = _mm_slli_epi16(x15, 1); // srow[-1]*2
1433
t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, -2), x0); // srow[-2]+srow[0]
1434
1435
// RGs += (srow[-2]+srow[0]) * (T>gradW)
1436
RGs = _mm_adds_epi16(RGs, _mm_and_si128(t1, mask));
1437
// GRs += (srow[-1]*2) * (T>gradW)
1438
GRs = _mm_adds_epi16(GRs, _mm_and_si128(t0, mask));
1439
// Bs += {(srow[-bstep-1]+srow[bstep-1]); (srow[bstep-2]+srow[-bstep-2])} * (T>gradW)
1440
Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_adds_epi16(x1,x13), _mm_adds_epi16(x14,x16)), mask));
1441
1442
// gradNW **********************************************
1443
mask = _mm_cmpgt_epi16(T, gradNW); // mask = T>gradNW
1444
ng = _mm_sub_epi16(ng, mask); // ng += (T>gradNW)
1445
1446
t0 = _mm_slli_epi16(x1, 1); // srow[-bstep-1]*2
1447
t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow,-bstep*2-2), x0); // srow[-bstep*2-2]+srow[0]
1448
1449
// RGs += {(srow[-bstep*2-2]+srow[0]); srow[-bstep-1]*2} * (T>gradNW)
1450
RGs = _mm_adds_epi16(RGs, _mm_and_si128(_mm_merge_epi16(t1, t0), mask));
1451
// GRs += {brow0[N6-1]; (srow[-bstep*2-1]+srow[-1])} * (T>gradNW)
1452
GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(_mm_loadu_si128((__m128i*)(brow0+N6-1)), _mm_adds_epi16(x2,x15)), mask));
1453
// Bs += {srow[-bstep-1]*2; (srow[-bstep]+srow[-bstep-2])} * (T>gradNW)
1454
Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_slli_epi16(x5, 1),_mm_adds_epi16(x3,x16)), mask));
1455
1456
__m128 ngf0 = _mm_div_ps(_0_5, _mm_cvtloepi16_ps(ng));
1457
__m128 ngf1 = _mm_div_ps(_0_5, _mm_cvthiepi16_ps(ng));
1458
1459
// now interpolate r, g & b
1460
t0 = _mm_subs_epi16(GRs, RGs);
1461
t1 = _mm_subs_epi16(Bs, RGs);
1462
1463
t0 = _mm_add_epi16(x0, _mm_packs_epi32(
1464
_mm_cvtps_epi32(_mm_mul_ps(_mm_cvtloepi16_ps(t0), ngf0)),
1465
_mm_cvtps_epi32(_mm_mul_ps(_mm_cvthiepi16_ps(t0), ngf1))));
1466
1467
t1 = _mm_add_epi16(x0, _mm_packs_epi32(
1468
_mm_cvtps_epi32(_mm_mul_ps(_mm_cvtloepi16_ps(t1), ngf0)),
1469
_mm_cvtps_epi32(_mm_mul_ps(_mm_cvthiepi16_ps(t1), ngf1))));
1470
1471
x1 = _mm_merge_epi16(x0, t0);
1472
x2 = _mm_merge_epi16(t0, x0);
1473
1474
uchar R[8], G[8], B[8];
1475
1476
_mm_storel_epi64(blueIdx ? (__m128i*)B : (__m128i*)R, _mm_packus_epi16(x1, z));
1477
_mm_storel_epi64((__m128i*)G, _mm_packus_epi16(x2, z));
1478
_mm_storel_epi64(blueIdx ? (__m128i*)R : (__m128i*)B, _mm_packus_epi16(t1, z));
1479
1480
for( int j = 0; j < 8; j++, dstrow += 3 )
1481
{
1482
dstrow[0] = B[j]; dstrow[1] = G[j]; dstrow[2] = R[j];
1483
}
1484
}
1485
#endif
1486
1487
limit = N - 2;
1488
}
1489
while( i < N - 2 );
1490
1491
for( i = 0; i < 6; i++ )
1492
{
1493
dst[dststep*y + 5 - i] = dst[dststep*y + 8 - i];
1494
dst[dststep*y + (N - 2)*3 + i] = dst[dststep*y + (N - 3)*3 + i];
1495
}
1496
1497
greenCell0 = !greenCell0;
1498
blueIdx ^= 2;
1499
}
1500
1501
for( i = 0; i < size.width*3; i++ )
1502
{
1503
dst[i] = dst[i + dststep] = dst[i + dststep*2];
1504
dst[i + dststep*(size.height-4)] =
1505
dst[i + dststep*(size.height-3)] =
1506
dst[i + dststep*(size.height-2)] =
1507
dst[i + dststep*(size.height-1)] = dst[i + dststep*(size.height-5)];
1508
}
1509
}
1510
1511
//////////////////////////////// Edge-Aware Demosaicing //////////////////////////////////
1512
1513
template <typename T, typename SIMDInterpolator>
1514
class Bayer2RGB_EdgeAware_T_Invoker :
1515
public cv::ParallelLoopBody
1516
{
1517
public:
1518
Bayer2RGB_EdgeAware_T_Invoker(const Mat& _src, Mat& _dst, const Size& _size,
1519
int _blue, int _start_with_green) :
1520
ParallelLoopBody(),
1521
src(_src), dst(_dst), size(_size), Blue(_blue), Start_with_green(_start_with_green)
1522
{
1523
}
1524
1525
virtual void operator()(const Range& range) const CV_OVERRIDE
1526
{
1527
int dcn = dst.channels();
1528
int dcn2 = dcn<<1;
1529
int start_with_green = Start_with_green, blue = Blue;
1530
int sstep = int(src.step / src.elemSize1()), dstep = int(dst.step / dst.elemSize1());
1531
SIMDInterpolator vecOp;
1532
1533
const T* S = src.ptr<T>(range.start + 1) + 1;
1534
T* D = reinterpret_cast<T*>(dst.data + (range.start + 1) * dst.step) + dcn;
1535
1536
if (range.start % 2)
1537
{
1538
start_with_green ^= 1;
1539
blue ^= 1;
1540
}
1541
1542
// to BGR
1543
for (int y = range.start; y < range.end; ++y)
1544
{
1545
int x = 1;
1546
if (start_with_green)
1547
{
1548
D[blue<<1] = (S[-sstep] + S[sstep]) >> 1;
1549
D[1] = S[0];
1550
D[2-(blue<<1)] = (S[-1] + S[1]) >> 1;
1551
D += dcn;
1552
++S;
1553
++x;
1554
}
1555
1556
int delta = vecOp.bayer2RGB_EA(S - sstep - 1, sstep, D, size.width, blue);
1557
x += delta;
1558
S += delta;
1559
D += dcn * delta;
1560
1561
if (blue)
1562
for (; x < size.width; x += 2, S += 2, D += dcn2)
1563
{
1564
D[0] = S[0];
1565
D[1] = (std::abs(S[-1] - S[1]) > std::abs(S[sstep] - S[-sstep]) ? (S[sstep] + S[-sstep] + 1) : (S[-1] + S[1] + 1)) >> 1;
1566
D[2] = (S[-sstep-1] + S[-sstep+1] + S[sstep-1] + S[sstep+1]) >> 2;
1567
1568
D[3] = (S[0] + S[2] + 1) >> 1;
1569
D[4] = S[1];
1570
D[5] = (S[-sstep+1] + S[sstep+1] + 1) >> 1;
1571
}
1572
else
1573
for (; x < size.width; x += 2, S += 2, D += dcn2)
1574
{
1575
D[0] = (S[-sstep-1] + S[-sstep+1] + S[sstep-1] + S[sstep+1] + 2) >> 2;
1576
D[1] = (std::abs(S[-1] - S[1]) > std::abs(S[sstep] - S[-sstep]) ? (S[sstep] + S[-sstep] + 1) : (S[-1] + S[1] + 1)) >> 1;
1577
D[2] = S[0];
1578
1579
D[3] = (S[-sstep+1] + S[sstep+1] + 1) >> 1;
1580
D[4] = S[1];
1581
D[5] = (S[0] + S[2] + 1) >> 1;
1582
}
1583
1584
if (x <= size.width)
1585
{
1586
D[blue<<1] = (S[-sstep-1] + S[-sstep+1] + S[sstep-1] + S[sstep+1] + 2) >> 2;
1587
D[1] = (std::abs(S[-1] - S[1]) > std::abs(S[sstep] - S[-sstep]) ? (S[sstep] + S[-sstep] + 1) : (S[-1] + S[1] + 1)) >> 1;
1588
D[2-(blue<<1)] = S[0];
1589
D += dcn;
1590
++S;
1591
}
1592
1593
for (int i = 0; i < dcn; ++i)
1594
{
1595
D[i] = D[-dcn + i];
1596
D[-dstep+dcn+i] = D[-dstep+(dcn<<1)+i];
1597
}
1598
1599
start_with_green ^= 1;
1600
blue ^= 1;
1601
S += 2;
1602
D += dcn2;
1603
}
1604
}
1605
1606
private:
1607
Mat src;
1608
Mat dst;
1609
Size size;
1610
int Blue, Start_with_green;
1611
};
1612
1613
template <typename T, typename SIMDInterpolator>
1614
static void Bayer2RGB_EdgeAware_T(const Mat& src, Mat& dst, int code)
1615
{
1616
Size size = src.size();
1617
1618
// for small sizes
1619
if (size.width <= 2 || size.height <= 2)
1620
{
1621
dst = Scalar::all(0);
1622
return;
1623
}
1624
1625
size.width -= 2;
1626
size.height -= 2;
1627
1628
int start_with_green = code == CV_BayerGB2BGR_EA || code == CV_BayerGR2BGR_EA ? 1 : 0;
1629
int blue = code == CV_BayerGB2BGR_EA || code == CV_BayerBG2BGR_EA ? 1 : 0;
1630
1631
if (size.height > 0)
1632
{
1633
Bayer2RGB_EdgeAware_T_Invoker<T, SIMDInterpolator> invoker(src, dst, size, blue, start_with_green);
1634
Range range(0, size.height);
1635
parallel_for_(range, invoker, dst.total()/static_cast<double>(1<<16));
1636
}
1637
size = dst.size();
1638
size.width *= dst.channels();
1639
size_t dstep = dst.step / dst.elemSize1();
1640
T* firstRow = dst.ptr<T>();
1641
T* lastRow = dst.ptr<T>() + (size.height-1) * dstep;
1642
1643
if (size.height > 2)
1644
{
1645
for (int x = 0; x < size.width; ++x)
1646
{
1647
firstRow[x] = (firstRow+dstep)[x];
1648
lastRow[x] = (lastRow-dstep)[x];
1649
}
1650
}
1651
else
1652
for (int x = 0; x < size.width; ++x)
1653
firstRow[x] = lastRow[x] = 0;
1654
}
1655
1656
} // end namespace cv
1657
1658
//////////////////////////////////////////////////////////////////////////////////////////
1659
// The main Demosaicing function //
1660
//////////////////////////////////////////////////////////////////////////////////////////
1661
1662
void cv::demosaicing(InputArray _src, OutputArray _dst, int code, int dcn)
1663
{
1664
CV_INSTRUMENT_REGION();
1665
1666
Mat src = _src.getMat(), dst;
1667
Size sz = src.size();
1668
int scn = src.channels(), depth = src.depth();
1669
1670
CV_Assert(depth == CV_8U || depth == CV_16U);
1671
CV_Assert(!src.empty());
1672
1673
switch (code)
1674
{
1675
case CV_BayerBG2GRAY: case CV_BayerGB2GRAY: case CV_BayerRG2GRAY: case CV_BayerGR2GRAY:
1676
if (dcn <= 0)
1677
dcn = 1;
1678
CV_Assert( scn == 1 && dcn == 1 );
1679
1680
_dst.create(sz, CV_MAKETYPE(depth, dcn));
1681
dst = _dst.getMat();
1682
1683
if( depth == CV_8U )
1684
Bayer2Gray_<uchar, SIMDBayerInterpolator_8u>(src, dst, code);
1685
else if( depth == CV_16U )
1686
Bayer2Gray_<ushort, SIMDBayerStubInterpolator_<ushort> >(src, dst, code);
1687
else
1688
CV_Error(CV_StsUnsupportedFormat, "Bayer->Gray demosaicing only supports 8u and 16u types");
1689
break;
1690
1691
case CV_BayerBG2BGRA: case CV_BayerGB2BGRA: case CV_BayerRG2BGRA: case CV_BayerGR2BGRA:
1692
if (dcn <= 0)
1693
dcn = 4;
1694
/* fallthrough */
1695
case CV_BayerBG2BGR: case CV_BayerGB2BGR: case CV_BayerRG2BGR: case CV_BayerGR2BGR:
1696
case CV_BayerBG2BGR_VNG: case CV_BayerGB2BGR_VNG: case CV_BayerRG2BGR_VNG: case CV_BayerGR2BGR_VNG:
1697
{
1698
if (dcn <= 0)
1699
dcn = 3;
1700
CV_Assert( scn == 1 && (dcn == 3 || dcn == 4) );
1701
1702
_dst.create(sz, CV_MAKE_TYPE(depth, dcn));
1703
Mat dst_ = _dst.getMat();
1704
1705
if( code == CV_BayerBG2BGR || code == CV_BayerBG2BGRA ||
1706
code == CV_BayerGB2BGR || code == CV_BayerGB2BGRA ||
1707
code == CV_BayerRG2BGR || code == CV_BayerRG2BGRA ||
1708
code == CV_BayerGR2BGR || code == CV_BayerGR2BGRA )
1709
{
1710
if( depth == CV_8U )
1711
Bayer2RGB_<uchar, SIMDBayerInterpolator_8u>(src, dst_, code);
1712
else if( depth == CV_16U )
1713
Bayer2RGB_<ushort, SIMDBayerStubInterpolator_<ushort> >(src, dst_, code);
1714
else
1715
CV_Error(CV_StsUnsupportedFormat, "Bayer->RGB demosaicing only supports 8u and 16u types");
1716
}
1717
else
1718
{
1719
CV_Assert( depth == CV_8U );
1720
Bayer2RGB_VNG_8u(src, dst_, code);
1721
}
1722
}
1723
break;
1724
1725
case CV_BayerBG2BGR_EA: case CV_BayerGB2BGR_EA: case CV_BayerRG2BGR_EA: case CV_BayerGR2BGR_EA:
1726
if (dcn <= 0)
1727
dcn = 3;
1728
1729
CV_Assert(scn == 1 && dcn == 3);
1730
_dst.create(sz, CV_MAKETYPE(depth, dcn));
1731
dst = _dst.getMat();
1732
1733
if (depth == CV_8U)
1734
Bayer2RGB_EdgeAware_T<uchar, SIMDBayerInterpolator_8u>(src, dst, code);
1735
else if (depth == CV_16U)
1736
Bayer2RGB_EdgeAware_T<ushort, SIMDBayerStubInterpolator_<ushort> >(src, dst, code);
1737
else
1738
CV_Error(CV_StsUnsupportedFormat, "Bayer->RGB Edge-Aware demosaicing only currently supports 8u and 16u types");
1739
1740
break;
1741
1742
default:
1743
CV_Error( CV_StsBadFlag, "Unknown / unsupported color conversion code" );
1744
}
1745
}
1746
1747