Path: blob/master/modules/imgproc/src/demosaicing.cpp
16354 views
/*M///////////////////////////////////////////////////////////////////////////////////////1//2// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.3//4// By downloading, copying, installing or using the software you agree to this license.5// If you do not agree to this license, do not download, install,6// copy or use the software.7//8//9// License Agreement10// For Open Source Computer Vision Library11//12// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.13// Copyright (C) 2009-2010, Willow Garage Inc., all rights reserved.14// Copyright (C) 2014, Itseez Inc., all rights reserved.15// Third party copyrights are property of their respective owners.16//17// Redistribution and use in source and binary forms, with or without modification,18// are permitted provided that the following conditions are met:19//20// * Redistribution's of source code must retain the above copyright notice,21// this list of conditions and the following disclaimer.22//23// * Redistribution's in binary form must reproduce the above copyright notice,24// this list of conditions and the following disclaimer in the documentation25// and/or other materials provided with the distribution.26//27// * The name of the copyright holders may not be used to endorse or promote products28// derived from this software without specific prior written permission.29//30// This software is provided by the copyright holders and contributors "as is" and31// any express or implied warranties, including, but not limited to, the implied32// warranties of merchantability and fitness for a particular purpose are disclaimed.33// In no event shall the Intel Corporation or contributors be liable for any direct,34// indirect, incidental, special, exemplary, or consequential damages35// (including, but not limited to, procurement of substitute goods or services;36// loss of use, data, or profits; or business interruption) however caused37// and on any theory of liability, whether in contract, strict liability,38// or tort (including negligence or otherwise) arising in any way out of39// the use of this software, even if advised of the possibility of such damage.40//41//M*/4243/********************************* COPYRIGHT NOTICE *******************************\44Original code for Bayer->BGR/RGB conversion is provided by Dirk Schaefer45from MD-Mathematische Dienste GmbH. Below is the copyright notice:4647IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.48By downloading, copying, installing or using the software you agree49to this license. If you do not agree to this license, do not download,50install, copy or use the software.5152Contributors License Agreement:5354Copyright (c) 2002,55MD-Mathematische Dienste GmbH56Im Defdahl 5-105744141 Dortmund58Germany59www.md-it.de6061Redistribution and use in source and binary forms,62with or without modification, are permitted provided63that the following conditions are met:6465Redistributions of source code must retain66the above copyright notice, this list of conditions and the following disclaimer.67Redistributions in binary form must reproduce the above copyright notice,68this list of conditions and the following disclaimer in the documentation69and/or other materials provided with the distribution.70The name of Contributor may not be used to endorse or promote products71derived from this software without specific prior written permission.7273THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"74AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,75THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR76PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE77FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL78DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS79OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)80HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,81STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)82ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF83THE POSSIBILITY OF SUCH DAMAGE.84\**********************************************************************************/858687#include "precomp.hpp"8889#include <limits>9091#define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n))9293namespace cv94{959697//////////////////////////// Bayer Pattern -> RGB conversion /////////////////////////////9899template<typename T>100class SIMDBayerStubInterpolator_101{102public:103int bayer2Gray(const T*, int, T*, int, int, int, int) const104{105return 0;106}107108int bayer2RGB(const T*, int, T*, int, int) const109{110return 0;111}112113int bayer2RGBA(const T*, int, T*, int, int) const114{115return 0;116}117118int bayer2RGB_EA(const T*, int, T*, int, int) const119{120return 0;121}122};123124#if CV_SSE2125class SIMDBayerInterpolator_8u126{127public:128SIMDBayerInterpolator_8u()129{130use_simd = checkHardwareSupport(CV_CPU_SSE2);131}132133int bayer2Gray(const uchar* bayer, int bayer_step, uchar* dst,134int width, int bcoeff, int gcoeff, int rcoeff) const135{136if( !use_simd )137return 0;138139__m128i _b2y = _mm_set1_epi16((short)(rcoeff*2));140__m128i _g2y = _mm_set1_epi16((short)(gcoeff*2));141__m128i _r2y = _mm_set1_epi16((short)(bcoeff*2));142const uchar* bayer_end = bayer + width;143144for( ; bayer <= bayer_end - 18; bayer += 14, dst += 14 )145{146__m128i r0 = _mm_loadu_si128((const __m128i*)bayer);147__m128i r1 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step));148__m128i r2 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step*2));149150__m128i b1 = _mm_add_epi16(_mm_srli_epi16(_mm_slli_epi16(r0, 8), 7),151_mm_srli_epi16(_mm_slli_epi16(r2, 8), 7));152__m128i b0 = _mm_add_epi16(b1, _mm_srli_si128(b1, 2));153b1 = _mm_slli_epi16(_mm_srli_si128(b1, 2), 1);154155__m128i g0 = _mm_add_epi16(_mm_srli_epi16(r0, 7), _mm_srli_epi16(r2, 7));156__m128i g1 = _mm_srli_epi16(_mm_slli_epi16(r1, 8), 7);157g0 = _mm_add_epi16(g0, _mm_add_epi16(g1, _mm_srli_si128(g1, 2)));158g1 = _mm_slli_epi16(_mm_srli_si128(g1, 2), 2);159160r0 = _mm_srli_epi16(r1, 8);161r1 = _mm_slli_epi16(_mm_add_epi16(r0, _mm_srli_si128(r0, 2)), 2);162r0 = _mm_slli_epi16(r0, 3);163164g0 = _mm_add_epi16(_mm_mulhi_epi16(b0, _b2y), _mm_mulhi_epi16(g0, _g2y));165g1 = _mm_add_epi16(_mm_mulhi_epi16(b1, _b2y), _mm_mulhi_epi16(g1, _g2y));166g0 = _mm_add_epi16(g0, _mm_mulhi_epi16(r0, _r2y));167g1 = _mm_add_epi16(g1, _mm_mulhi_epi16(r1, _r2y));168g0 = _mm_srli_epi16(g0, 2);169g1 = _mm_srli_epi16(g1, 2);170g0 = _mm_packus_epi16(g0, g0);171g1 = _mm_packus_epi16(g1, g1);172g0 = _mm_unpacklo_epi8(g0, g1);173_mm_storeu_si128((__m128i*)dst, g0);174}175176return (int)(bayer - (bayer_end - width));177}178179int bayer2RGB(const uchar* bayer, int bayer_step, uchar* dst, int width, int blue) const180{181if( !use_simd )182return 0;183/*184B G B G | B G B G | B G B G | B G B G185G R G R | G R G R | G R G R | G R G R186B G B G | B G B G | B G B G | B G B G187*/188189__m128i delta1 = _mm_set1_epi16(1), delta2 = _mm_set1_epi16(2);190__m128i mask = _mm_set1_epi16(blue < 0 ? -1 : 0), z = _mm_setzero_si128();191__m128i masklo = _mm_set1_epi16(0x00ff);192const uchar* bayer_end = bayer + width;193194for( ; bayer <= bayer_end - 18; bayer += 14, dst += 42 )195{196__m128i r0 = _mm_loadu_si128((const __m128i*)bayer);197__m128i r1 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step));198__m128i r2 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step*2));199200__m128i b1 = _mm_add_epi16(_mm_and_si128(r0, masklo), _mm_and_si128(r2, masklo));201__m128i nextb1 = _mm_srli_si128(b1, 2);202__m128i b0 = _mm_add_epi16(b1, nextb1);203b1 = _mm_srli_epi16(_mm_add_epi16(nextb1, delta1), 1);204b0 = _mm_srli_epi16(_mm_add_epi16(b0, delta2), 2);205// b0 b2 ... b14 b1 b3 ... b15206b0 = _mm_packus_epi16(b0, b1);207208__m128i g0 = _mm_add_epi16(_mm_srli_epi16(r0, 8), _mm_srli_epi16(r2, 8));209__m128i g1 = _mm_and_si128(r1, masklo);210g0 = _mm_add_epi16(g0, _mm_add_epi16(g1, _mm_srli_si128(g1, 2)));211g1 = _mm_srli_si128(g1, 2);212g0 = _mm_srli_epi16(_mm_add_epi16(g0, delta2), 2);213// g0 g2 ... g14 g1 g3 ... g15214g0 = _mm_packus_epi16(g0, g1);215216r0 = _mm_srli_epi16(r1, 8);217r1 = _mm_add_epi16(r0, _mm_srli_si128(r0, 2));218r1 = _mm_srli_epi16(_mm_add_epi16(r1, delta1), 1);219// r0 r2 ... r14 r1 r3 ... r15220r0 = _mm_packus_epi16(r0, r1);221222b1 = _mm_and_si128(_mm_xor_si128(b0, r0), mask);223b0 = _mm_xor_si128(b0, b1);224r0 = _mm_xor_si128(r0, b1);225226// b1 g1 b3 g3 b5 g5...227b1 = _mm_unpackhi_epi8(b0, g0);228// b0 g0 b2 g2 b4 g4 ....229b0 = _mm_unpacklo_epi8(b0, g0);230231// r1 0 r3 0 r5 0 ...232r1 = _mm_unpackhi_epi8(r0, z);233// r0 0 r2 0 r4 0 ...234r0 = _mm_unpacklo_epi8(r0, z);235236// 0 b0 g0 r0 0 b2 g2 r2 ...237g0 = _mm_slli_si128(_mm_unpacklo_epi16(b0, r0), 1);238// 0 b8 g8 r8 0 b10 g10 r10 ...239g1 = _mm_slli_si128(_mm_unpackhi_epi16(b0, r0), 1);240241// b1 g1 r1 0 b3 g3 r3 0 ...242r0 = _mm_unpacklo_epi16(b1, r1);243// b9 g9 r9 0 b11 g11 r11 0 ...244r1 = _mm_unpackhi_epi16(b1, r1);245246// 0 b0 g0 r0 b1 g1 r1 0 ...247b0 = _mm_srli_si128(_mm_unpacklo_epi32(g0, r0), 1);248// 0 b4 g4 r4 b5 g5 r5 0 ...249b1 = _mm_srli_si128(_mm_unpackhi_epi32(g0, r0), 1);250251_mm_storel_epi64((__m128i*)(dst-1+0), b0);252_mm_storel_epi64((__m128i*)(dst-1+6*1), _mm_srli_si128(b0, 8));253_mm_storel_epi64((__m128i*)(dst-1+6*2), b1);254_mm_storel_epi64((__m128i*)(dst-1+6*3), _mm_srli_si128(b1, 8));255256// 0 b8 g8 r8 b9 g9 r9 0 ...257g0 = _mm_srli_si128(_mm_unpacklo_epi32(g1, r1), 1);258// 0 b12 g12 r12 b13 g13 r13 0 ...259g1 = _mm_srli_si128(_mm_unpackhi_epi32(g1, r1), 1);260261_mm_storel_epi64((__m128i*)(dst-1+6*4), g0);262_mm_storel_epi64((__m128i*)(dst-1+6*5), _mm_srli_si128(g0, 8));263264_mm_storel_epi64((__m128i*)(dst-1+6*6), g1);265}266267return (int)(bayer - (bayer_end - width));268}269270int bayer2RGBA(const uchar*, int, uchar*, int, int) const271{272return 0;273}274275int bayer2RGB_EA(const uchar* bayer, int bayer_step, uchar* dst, int width, int blue) const276{277if (!use_simd)278return 0;279280const uchar* bayer_end = bayer + width;281__m128i masklow = _mm_set1_epi16(0x00ff);282__m128i delta1 = _mm_set1_epi16(1), delta2 = _mm_set1_epi16(2);283__m128i full = _mm_set1_epi16(-1), z = _mm_setzero_si128();284__m128i mask = _mm_set1_epi16(blue > 0 ? -1 : 0);285286for ( ; bayer <= bayer_end - 18; bayer += 14, dst += 42)287{288/*289B G B G | B G B G | B G B G | B G B G290G R G R | G R G R | G R G R | G R G R291B G B G | B G B G | B G B G | B G B G292*/293294__m128i r0 = _mm_loadu_si128((const __m128i*)bayer);295__m128i r1 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step));296__m128i r2 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step*2));297298__m128i b1 = _mm_add_epi16(_mm_and_si128(r0, masklow), _mm_and_si128(r2, masklow));299__m128i nextb1 = _mm_srli_si128(b1, 2);300__m128i b0 = _mm_add_epi16(b1, nextb1);301b1 = _mm_srli_epi16(_mm_add_epi16(nextb1, delta1), 1);302b0 = _mm_srli_epi16(_mm_add_epi16(b0, delta2), 2);303// b0 b2 ... b14 b1 b3 ... b15304b0 = _mm_packus_epi16(b0, b1);305306// vertical sum307__m128i r0g = _mm_srli_epi16(r0, 8);308__m128i r2g = _mm_srli_epi16(r2, 8);309__m128i sumv = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(r0g, r2g), delta1), 1);310// gorizontal sum311__m128i g1 = _mm_and_si128(masklow, r1);312__m128i nextg1 = _mm_srli_si128(g1, 2);313__m128i sumg = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(g1, nextg1), delta1), 1);314315// gradients316__m128i gradv = _mm_adds_epi16(_mm_subs_epu16(r0g, r2g), _mm_subs_epu16(r2g, r0g));317__m128i gradg = _mm_adds_epi16(_mm_subs_epu16(nextg1, g1), _mm_subs_epu16(g1, nextg1));318__m128i gmask = _mm_cmpgt_epi16(gradg, gradv);319320__m128i g0 = _mm_add_epi16(_mm_and_si128(gmask, sumv), _mm_and_si128(sumg, _mm_xor_si128(gmask, full)));321// g0 g2 ... g14 g1 g3 ...322g0 = _mm_packus_epi16(g0, nextg1);323324r0 = _mm_srli_epi16(r1, 8);325r1 = _mm_add_epi16(r0, _mm_srli_si128(r0, 2));326r1 = _mm_srli_epi16(_mm_add_epi16(r1, delta1), 1);327// r0 r2 ... r14 r1 r3 ... r15328r0 = _mm_packus_epi16(r0, r1);329330b1 = _mm_and_si128(_mm_xor_si128(b0, r0), mask);331b0 = _mm_xor_si128(b0, b1);332r0 = _mm_xor_si128(r0, b1);333334// b1 g1 b3 g3 b5 g5...335b1 = _mm_unpackhi_epi8(b0, g0);336// b0 g0 b2 g2 b4 g4 ....337b0 = _mm_unpacklo_epi8(b0, g0);338339// r1 0 r3 0 r5 0 ...340r1 = _mm_unpackhi_epi8(r0, z);341// r0 0 r2 0 r4 0 ...342r0 = _mm_unpacklo_epi8(r0, z);343344// 0 b0 g0 r0 0 b2 g2 r2 ...345g0 = _mm_slli_si128(_mm_unpacklo_epi16(b0, r0), 1);346// 0 b8 g8 r8 0 b10 g10 r10 ...347g1 = _mm_slli_si128(_mm_unpackhi_epi16(b0, r0), 1);348349// b1 g1 r1 0 b3 g3 r3 0 ...350r0 = _mm_unpacklo_epi16(b1, r1);351// b9 g9 r9 0 b11 g11 r11 0 ...352r1 = _mm_unpackhi_epi16(b1, r1);353354// 0 b0 g0 r0 b1 g1 r1 0 ...355b0 = _mm_srli_si128(_mm_unpacklo_epi32(g0, r0), 1);356// 0 b4 g4 r4 b5 g5 r5 0 ...357b1 = _mm_srli_si128(_mm_unpackhi_epi32(g0, r0), 1);358359_mm_storel_epi64((__m128i*)(dst+0), b0);360_mm_storel_epi64((__m128i*)(dst+6*1), _mm_srli_si128(b0, 8));361_mm_storel_epi64((__m128i*)(dst+6*2), b1);362_mm_storel_epi64((__m128i*)(dst+6*3), _mm_srli_si128(b1, 8));363364// 0 b8 g8 r8 b9 g9 r9 0 ...365g0 = _mm_srli_si128(_mm_unpacklo_epi32(g1, r1), 1);366// 0 b12 g12 r12 b13 g13 r13 0 ...367g1 = _mm_srli_si128(_mm_unpackhi_epi32(g1, r1), 1);368369_mm_storel_epi64((__m128i*)(dst+6*4), g0);370_mm_storel_epi64((__m128i*)(dst+6*5), _mm_srli_si128(g0, 8));371372_mm_storel_epi64((__m128i*)(dst+6*6), g1);373}374375return int(bayer - (bayer_end - width));376}377378bool use_simd;379};380#elif CV_NEON381class SIMDBayerInterpolator_8u382{383public:384SIMDBayerInterpolator_8u()385{386}387388int bayer2Gray(const uchar* bayer, int bayer_step, uchar* dst,389int width, int bcoeff, int gcoeff, int rcoeff) const390{391/*392B G B G | B G B G | B G B G | B G B G393G R G R | G R G R | G R G R | G R G R394B G B G | B G B G | B G B G | B G B G395*/396397uint16x8_t masklo = vdupq_n_u16(255);398const uchar* bayer_end = bayer + width;399400for( ; bayer <= bayer_end - 18; bayer += 14, dst += 14 )401{402uint16x8_t r0 = vld1q_u16((const ushort*)bayer);403uint16x8_t r1 = vld1q_u16((const ushort*)(bayer + bayer_step));404uint16x8_t r2 = vld1q_u16((const ushort*)(bayer + bayer_step*2));405406uint16x8_t b1_ = vaddq_u16(vandq_u16(r0, masklo), vandq_u16(r2, masklo));407uint16x8_t b1 = vextq_u16(b1_, b1_, 1);408uint16x8_t b0 = vaddq_u16(b1_, b1);409// b0 = b0 b2 b4 ...410// b1 = b1 b3 b5 ...411412uint16x8_t g0 = vaddq_u16(vshrq_n_u16(r0, 8), vshrq_n_u16(r2, 8));413uint16x8_t g1 = vandq_u16(r1, masklo);414g0 = vaddq_u16(g0, vaddq_u16(g1, vextq_u16(g1, g1, 1)));415uint16x8_t rot = vextq_u16(g1, g1, 1);416g1 = vshlq_n_u16(rot, 2);417// g0 = b0 b2 b4 ...418// g1 = b1 b3 b5 ...419420r0 = vshrq_n_u16(r1, 8);421r1 = vaddq_u16(r0, vextq_u16(r0, r0, 1));422r0 = vshlq_n_u16(r0, 2);423// r0 = r0 r2 r4 ...424// r1 = r1 r3 r5 ...425426b0 = vreinterpretq_u16_s16(vqdmulhq_n_s16(vreinterpretq_s16_u16(b0), (short)(rcoeff*2)));427b1 = vreinterpretq_u16_s16(vqdmulhq_n_s16(vreinterpretq_s16_u16(b1), (short)(rcoeff*4)));428429g0 = vreinterpretq_u16_s16(vqdmulhq_n_s16(vreinterpretq_s16_u16(g0), (short)(gcoeff*2)));430g1 = vreinterpretq_u16_s16(vqdmulhq_n_s16(vreinterpretq_s16_u16(g1), (short)(gcoeff*2)));431432r0 = vreinterpretq_u16_s16(vqdmulhq_n_s16(vreinterpretq_s16_u16(r0), (short)(bcoeff*2)));433r1 = vreinterpretq_u16_s16(vqdmulhq_n_s16(vreinterpretq_s16_u16(r1), (short)(bcoeff*4)));434435g0 = vaddq_u16(vaddq_u16(g0, b0), r0);436g1 = vaddq_u16(vaddq_u16(g1, b1), r1);437438uint8x8x2_t p = vzip_u8(vrshrn_n_u16(g0, 2), vrshrn_n_u16(g1, 2));439vst1_u8(dst, p.val[0]);440vst1_u8(dst + 8, p.val[1]);441}442443return (int)(bayer - (bayer_end - width));444}445446int bayer2RGB(const uchar* bayer, int bayer_step, uchar* dst, int width, int blue) const447{448/*449B G B G | B G B G | B G B G | B G B G450G R G R | G R G R | G R G R | G R G R451B G B G | B G B G | B G B G | B G B G452*/453uint16x8_t masklo = vdupq_n_u16(255);454uint8x16x3_t pix;455const uchar* bayer_end = bayer + width;456457for( ; bayer <= bayer_end - 18; bayer += 14, dst += 42 )458{459uint16x8_t r0 = vld1q_u16((const ushort*)bayer);460uint16x8_t r1 = vld1q_u16((const ushort*)(bayer + bayer_step));461uint16x8_t r2 = vld1q_u16((const ushort*)(bayer + bayer_step*2));462463uint16x8_t b1 = vaddq_u16(vandq_u16(r0, masklo), vandq_u16(r2, masklo));464uint16x8_t nextb1 = vextq_u16(b1, b1, 1);465uint16x8_t b0 = vaddq_u16(b1, nextb1);466// b0 b1 b2 ...467uint8x8x2_t bb = vzip_u8(vrshrn_n_u16(b0, 2), vrshrn_n_u16(nextb1, 1));468pix.val[1-blue] = vcombine_u8(bb.val[0], bb.val[1]);469470uint16x8_t g0 = vaddq_u16(vshrq_n_u16(r0, 8), vshrq_n_u16(r2, 8));471uint16x8_t g1 = vandq_u16(r1, masklo);472g0 = vaddq_u16(g0, vaddq_u16(g1, vextq_u16(g1, g1, 1)));473g1 = vextq_u16(g1, g1, 1);474// g0 g1 g2 ...475uint8x8x2_t gg = vzip_u8(vrshrn_n_u16(g0, 2), vmovn_u16(g1));476pix.val[1] = vcombine_u8(gg.val[0], gg.val[1]);477478r0 = vshrq_n_u16(r1, 8);479r1 = vaddq_u16(r0, vextq_u16(r0, r0, 1));480// r0 r1 r2 ...481uint8x8x2_t rr = vzip_u8(vmovn_u16(r0), vrshrn_n_u16(r1, 1));482pix.val[1+blue] = vcombine_u8(rr.val[0], rr.val[1]);483484vst3q_u8(dst-1, pix);485}486487return (int)(bayer - (bayer_end - width));488}489490int bayer2RGBA(const uchar* bayer, int bayer_step, uchar* dst, int width, int blue) const491{492/*493B G B G | B G B G | B G B G | B G B G494G R G R | G R G R | G R G R | G R G R495B G B G | B G B G | B G B G | B G B G496*/497uint16x8_t masklo = vdupq_n_u16(255);498uint8x16x4_t pix;499const uchar* bayer_end = bayer + width;500pix.val[3] = vdupq_n_u8(255);501502for( ; bayer <= bayer_end - 18; bayer += 14, dst += 56 )503{504uint16x8_t r0 = vld1q_u16((const ushort*)bayer);505uint16x8_t r1 = vld1q_u16((const ushort*)(bayer + bayer_step));506uint16x8_t r2 = vld1q_u16((const ushort*)(bayer + bayer_step*2));507508uint16x8_t b1 = vaddq_u16(vandq_u16(r0, masklo), vandq_u16(r2, masklo));509uint16x8_t nextb1 = vextq_u16(b1, b1, 1);510uint16x8_t b0 = vaddq_u16(b1, nextb1);511// b0 b1 b2 ...512uint8x8x2_t bb = vzip_u8(vrshrn_n_u16(b0, 2), vrshrn_n_u16(nextb1, 1));513pix.val[1-blue] = vcombine_u8(bb.val[0], bb.val[1]);514515uint16x8_t g0 = vaddq_u16(vshrq_n_u16(r0, 8), vshrq_n_u16(r2, 8));516uint16x8_t g1 = vandq_u16(r1, masklo);517g0 = vaddq_u16(g0, vaddq_u16(g1, vextq_u16(g1, g1, 1)));518g1 = vextq_u16(g1, g1, 1);519// g0 g1 g2 ...520uint8x8x2_t gg = vzip_u8(vrshrn_n_u16(g0, 2), vmovn_u16(g1));521pix.val[1] = vcombine_u8(gg.val[0], gg.val[1]);522523r0 = vshrq_n_u16(r1, 8);524r1 = vaddq_u16(r0, vextq_u16(r0, r0, 1));525// r0 r1 r2 ...526uint8x8x2_t rr = vzip_u8(vmovn_u16(r0), vrshrn_n_u16(r1, 1));527pix.val[1+blue] = vcombine_u8(rr.val[0], rr.val[1]);528529vst4q_u8(dst-1, pix);530}531532return (int)(bayer - (bayer_end - width));533}534535int bayer2RGB_EA(const uchar*, int, uchar*, int, int) const536{537return 0;538}539};540#else541typedef SIMDBayerStubInterpolator_<uchar> SIMDBayerInterpolator_8u;542#endif543544545template<typename T, class SIMDInterpolator>546class Bayer2Gray_Invoker :547public ParallelLoopBody548{549public:550Bayer2Gray_Invoker(const Mat& _srcmat, Mat& _dstmat, int _start_with_green, bool _brow,551const Size& _size, int _bcoeff, int _rcoeff) :552ParallelLoopBody(), srcmat(_srcmat), dstmat(_dstmat), Start_with_green(_start_with_green),553Brow(_brow), size(_size), Bcoeff(_bcoeff), Rcoeff(_rcoeff)554{555}556557virtual void operator ()(const Range& range) const CV_OVERRIDE558{559SIMDInterpolator vecOp;560const int G2Y = 9617;561const int SHIFT = 14;562563const T* bayer0 = srcmat.ptr<T>();564int bayer_step = (int)(srcmat.step/sizeof(T));565T* dst0 = (T*)dstmat.data;566int dst_step = (int)(dstmat.step/sizeof(T));567int bcoeff = Bcoeff, rcoeff = Rcoeff;568int start_with_green = Start_with_green;569bool brow = Brow;570571dst0 += dst_step + 1;572573if (range.start % 2)574{575brow = !brow;576std::swap(bcoeff, rcoeff);577start_with_green = !start_with_green;578}579580bayer0 += range.start * bayer_step;581dst0 += range.start * dst_step;582583for(int i = range.start ; i < range.end; ++i, bayer0 += bayer_step, dst0 += dst_step )584{585unsigned t0, t1, t2;586const T* bayer = bayer0;587T* dst = dst0;588const T* bayer_end = bayer + size.width;589590if( size.width <= 0 )591{592dst[-1] = dst[size.width] = 0;593continue;594}595596if( start_with_green )597{598t0 = (bayer[1] + bayer[bayer_step*2+1])*rcoeff;599t1 = (bayer[bayer_step] + bayer[bayer_step+2])*bcoeff;600t2 = bayer[bayer_step+1]*(2*G2Y);601602dst[0] = (T)CV_DESCALE(t0 + t1 + t2, SHIFT+1);603bayer++;604dst++;605}606607int delta = vecOp.bayer2Gray(bayer, bayer_step, dst, size.width, bcoeff, G2Y, rcoeff);608bayer += delta;609dst += delta;610611for( ; bayer <= bayer_end - 2; bayer += 2, dst += 2 )612{613t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] + bayer[bayer_step*2+2])*rcoeff;614t1 = (bayer[1] + bayer[bayer_step] + bayer[bayer_step+2] + bayer[bayer_step*2+1])*G2Y;615t2 = bayer[bayer_step+1]*(4*bcoeff);616dst[0] = (T)CV_DESCALE(t0 + t1 + t2, SHIFT+2);617618t0 = (bayer[2] + bayer[bayer_step*2+2])*rcoeff;619t1 = (bayer[bayer_step+1] + bayer[bayer_step+3])*bcoeff;620t2 = bayer[bayer_step+2]*(2*G2Y);621dst[1] = (T)CV_DESCALE(t0 + t1 + t2, SHIFT+1);622}623624if( bayer < bayer_end )625{626t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] + bayer[bayer_step*2+2])*rcoeff;627t1 = (bayer[1] + bayer[bayer_step] + bayer[bayer_step+2] + bayer[bayer_step*2+1])*G2Y;628t2 = bayer[bayer_step+1]*(4*bcoeff);629dst[0] = (T)CV_DESCALE(t0 + t1 + t2, SHIFT+2);630bayer++;631dst++;632}633634dst0[-1] = dst0[0];635dst0[size.width] = dst0[size.width-1];636637brow = !brow;638std::swap(bcoeff, rcoeff);639start_with_green = !start_with_green;640}641}642643private:644Mat srcmat;645Mat dstmat;646int Start_with_green;647bool Brow;648Size size;649int Bcoeff, Rcoeff;650};651652template<typename T, typename SIMDInterpolator>653static void Bayer2Gray_( const Mat& srcmat, Mat& dstmat, int code )654{655const int R2Y = 4899;656const int B2Y = 1868;657658Size size = srcmat.size();659int bcoeff = B2Y, rcoeff = R2Y;660int start_with_green = code == CV_BayerGB2GRAY || code == CV_BayerGR2GRAY;661bool brow = true;662663if( code != CV_BayerBG2GRAY && code != CV_BayerGB2GRAY )664{665brow = false;666std::swap(bcoeff, rcoeff);667}668size.height -= 2;669size.width -= 2;670671if (size.height > 0)672{673Range range(0, size.height);674Bayer2Gray_Invoker<T, SIMDInterpolator> invoker(srcmat, dstmat,675start_with_green, brow, size, bcoeff, rcoeff);676parallel_for_(range, invoker, dstmat.total()/static_cast<double>(1<<16));677}678679size = dstmat.size();680T* dst0 = dstmat.ptr<T>();681int dst_step = (int)(dstmat.step/sizeof(T));682if( size.height > 2 )683for( int i = 0; i < size.width; i++ )684{685dst0[i] = dst0[i + dst_step];686dst0[i + (size.height-1)*dst_step] = dst0[i + (size.height-2)*dst_step];687}688else689for( int i = 0; i < size.width; i++ )690dst0[i] = dst0[i + (size.height-1)*dst_step] = 0;691}692693template <typename T>694struct Alpha695{696static T value() { return std::numeric_limits<T>::max(); }697};698699template <>700struct Alpha<float>701{702static float value() { return 1.0f; }703};704705template <typename T, typename SIMDInterpolator>706class Bayer2RGB_Invoker :707public ParallelLoopBody708{709public:710Bayer2RGB_Invoker(const Mat& _srcmat, Mat& _dstmat, int _start_with_green, int _blue, const Size& _size) :711ParallelLoopBody(),712srcmat(_srcmat), dstmat(_dstmat), Start_with_green(_start_with_green), Blue(_blue), size(_size)713{714}715716virtual void operator() (const Range& range) const CV_OVERRIDE717{718SIMDInterpolator vecOp;719T alpha = Alpha<T>::value();720int dcn = dstmat.channels();721int dcn2 = dcn << 1;722723int bayer_step = (int)(srcmat.step/sizeof(T));724const T* bayer0 = srcmat.ptr<T>() + bayer_step * range.start;725726int dst_step = (int)(dstmat.step/sizeof(T));727T* dst0 = reinterpret_cast<T*>(dstmat.data) + (range.start + 1) * dst_step + dcn + 1;728729int blue = Blue, start_with_green = Start_with_green;730if (range.start % 2)731{732blue = -blue;733start_with_green = !start_with_green;734}735736for (int i = range.start; i < range.end; bayer0 += bayer_step, dst0 += dst_step, ++i )737{738int t0, t1;739const T* bayer = bayer0;740T* dst = dst0;741const T* bayer_end = bayer + size.width;742743// in case of when size.width <= 2744if( size.width <= 0 )745{746if (dcn == 3)747{748dst[-4] = dst[-3] = dst[-2] = dst[size.width*dcn-1] =749dst[size.width*dcn] = dst[size.width*dcn+1] = 0;750}751else752{753dst[-5] = dst[-4] = dst[-3] = dst[size.width*dcn-1] =754dst[size.width*dcn] = dst[size.width*dcn+1] = 0;755dst[-2] = dst[size.width*dcn+2] = alpha;756}757continue;758}759760if( start_with_green )761{762t0 = (bayer[1] + bayer[bayer_step*2+1] + 1) >> 1;763t1 = (bayer[bayer_step] + bayer[bayer_step+2] + 1) >> 1;764765dst[-blue] = (T)t0;766dst[0] = bayer[bayer_step+1];767dst[blue] = (T)t1;768if (dcn == 4)769dst[2] = alpha; // alpha channel770771bayer++;772dst += dcn;773}774775// simd optimization only for dcn == 3776int delta = dcn == 4 ?777vecOp.bayer2RGBA(bayer, bayer_step, dst, size.width, blue) :778vecOp.bayer2RGB(bayer, bayer_step, dst, size.width, blue);779bayer += delta;780dst += delta*dcn;781782if (dcn == 3) // Bayer to BGR783{784if( blue > 0 )785{786for( ; bayer <= bayer_end - 2; bayer += 2, dst += dcn2 )787{788t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] +789bayer[bayer_step*2+2] + 2) >> 2;790t1 = (bayer[1] + bayer[bayer_step] +791bayer[bayer_step+2] + bayer[bayer_step*2+1]+2) >> 2;792dst[-1] = (T)t0;793dst[0] = (T)t1;794dst[1] = bayer[bayer_step+1];795796t0 = (bayer[2] + bayer[bayer_step*2+2] + 1) >> 1;797t1 = (bayer[bayer_step+1] + bayer[bayer_step+3] + 1) >> 1;798dst[2] = (T)t0;799dst[3] = bayer[bayer_step+2];800dst[4] = (T)t1;801}802}803else804{805for( ; bayer <= bayer_end - 2; bayer += 2, dst += dcn2 )806{807t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] +808bayer[bayer_step*2+2] + 2) >> 2;809t1 = (bayer[1] + bayer[bayer_step] +810bayer[bayer_step+2] + bayer[bayer_step*2+1]+2) >> 2;811dst[1] = (T)t0;812dst[0] = (T)t1;813dst[-1] = bayer[bayer_step+1];814815t0 = (bayer[2] + bayer[bayer_step*2+2] + 1) >> 1;816t1 = (bayer[bayer_step+1] + bayer[bayer_step+3] + 1) >> 1;817dst[4] = (T)t0;818dst[3] = bayer[bayer_step+2];819dst[2] = (T)t1;820}821}822}823else // Bayer to BGRA824{825// if current row does not contain Blue pixels826if( blue > 0 )827{828for( ; bayer <= bayer_end - 2; bayer += 2, dst += dcn2 )829{830t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] +831bayer[bayer_step*2+2] + 2) >> 2;832t1 = (bayer[1] + bayer[bayer_step] +833bayer[bayer_step+2] + bayer[bayer_step*2+1]+2) >> 2;834dst[-1] = (T)t0;835dst[0] = (T)t1;836dst[1] = bayer[bayer_step+1];837dst[2] = alpha; // alpha channel838839t0 = (bayer[2] + bayer[bayer_step*2+2] + 1) >> 1;840t1 = (bayer[bayer_step+1] + bayer[bayer_step+3] + 1) >> 1;841dst[3] = (T)t0;842dst[4] = bayer[bayer_step+2];843dst[5] = (T)t1;844dst[6] = alpha; // alpha channel845}846}847else // if current row contains Blue pixels848{849for( ; bayer <= bayer_end - 2; bayer += 2, dst += dcn2 )850{851t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] +852bayer[bayer_step*2+2] + 2) >> 2;853t1 = (bayer[1] + bayer[bayer_step] +854bayer[bayer_step+2] + bayer[bayer_step*2+1]+2) >> 2;855dst[-1] = bayer[bayer_step+1];856dst[0] = (T)t1;857dst[1] = (T)t0;858dst[2] = alpha; // alpha channel859860t0 = (bayer[2] + bayer[bayer_step*2+2] + 1) >> 1;861t1 = (bayer[bayer_step+1] + bayer[bayer_step+3] + 1) >> 1;862dst[3] = (T)t1;863dst[4] = bayer[bayer_step+2];864dst[5] = (T)t0;865dst[6] = alpha; // alpha channel866}867}868}869870// if skip one pixel at the end of row871if( bayer < bayer_end )872{873t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] +874bayer[bayer_step*2+2] + 2) >> 2;875t1 = (bayer[1] + bayer[bayer_step] +876bayer[bayer_step+2] + bayer[bayer_step*2+1]+2) >> 2;877dst[-blue] = (T)t0;878dst[0] = (T)t1;879dst[blue] = bayer[bayer_step+1];880if (dcn == 4)881dst[2] = alpha; // alpha channel882bayer++;883dst += dcn;884}885886// fill the last and the first pixels of row accordingly887if (dcn == 3)888{889dst0[-4] = dst0[-1];890dst0[-3] = dst0[0];891dst0[-2] = dst0[1];892dst0[size.width*dcn-1] = dst0[size.width*dcn-4];893dst0[size.width*dcn] = dst0[size.width*dcn-3];894dst0[size.width*dcn+1] = dst0[size.width*dcn-2];895}896else897{898dst0[-5] = dst0[-1];899dst0[-4] = dst0[0];900dst0[-3] = dst0[1];901dst0[-2] = dst0[2]; // alpha channel902dst0[size.width*dcn-1] = dst0[size.width*dcn-5];903dst0[size.width*dcn] = dst0[size.width*dcn-4];904dst0[size.width*dcn+1] = dst0[size.width*dcn-3];905dst0[size.width*dcn+2] = dst0[size.width*dcn-2]; // alpha channel906}907908blue = -blue;909start_with_green = !start_with_green;910}911}912913private:914Mat srcmat;915Mat dstmat;916int Start_with_green, Blue;917Size size;918};919920template<typename T, class SIMDInterpolator>921static void Bayer2RGB_( const Mat& srcmat, Mat& dstmat, int code )922{923int dst_step = (int)(dstmat.step/sizeof(T));924Size size = srcmat.size();925int blue = (code == CV_BayerBG2BGR || code == CV_BayerGB2BGR ||926code == CV_BayerBG2BGRA || code == CV_BayerGB2BGRA ) ? -1 : 1;927int start_with_green = (code == CV_BayerGB2BGR || code == CV_BayerGR2BGR ||928code == CV_BayerGB2BGRA || code == CV_BayerGR2BGRA);929930int dcn = dstmat.channels();931size.height -= 2;932size.width -= 2;933934if (size.height > 0)935{936Range range(0, size.height);937Bayer2RGB_Invoker<T, SIMDInterpolator> invoker(srcmat, dstmat, start_with_green, blue, size);938parallel_for_(range, invoker, dstmat.total()/static_cast<double>(1<<16));939}940941// filling the first and the last rows942size = dstmat.size();943T* dst0 = dstmat.ptr<T>();944if( size.height > 2 )945for( int i = 0; i < size.width*dcn; i++ )946{947dst0[i] = dst0[i + dst_step];948dst0[i + (size.height-1)*dst_step] = dst0[i + (size.height-2)*dst_step];949}950else951for( int i = 0; i < size.width*dcn; i++ )952dst0[i] = dst0[i + (size.height-1)*dst_step] = 0;953}954955956/////////////////// Demosaicing using Variable Number of Gradients ///////////////////////957958static void Bayer2RGB_VNG_8u( const Mat& srcmat, Mat& dstmat, int code )959{960const uchar* bayer = srcmat.ptr();961int bstep = (int)srcmat.step;962uchar* dst = dstmat.ptr();963int dststep = (int)dstmat.step;964Size size = srcmat.size();965966int blueIdx = code == CV_BayerBG2BGR_VNG || code == CV_BayerGB2BGR_VNG ? 0 : 2;967bool greenCell0 = code != CV_BayerBG2BGR_VNG && code != CV_BayerRG2BGR_VNG;968969// for too small images use the simple interpolation algorithm970if( MIN(size.width, size.height) < 8 )971{972Bayer2RGB_<uchar, SIMDBayerInterpolator_8u>( srcmat, dstmat, code );973return;974}975976const int brows = 3, bcn = 7;977int N = size.width, N2 = N*2, N3 = N*3, N4 = N*4, N5 = N*5, N6 = N*6, N7 = N*7;978int i, bufstep = N7*bcn;979cv::AutoBuffer<ushort> _buf(bufstep*brows);980ushort* buf = _buf.data();981982bayer += bstep*2;983984#if CV_SSE2985bool haveSSE = cv::checkHardwareSupport(CV_CPU_SSE2);986#define _mm_absdiff_epu16(a,b) _mm_adds_epu16(_mm_subs_epu16(a, b), _mm_subs_epu16(b, a))987#endif988989for( int y = 2; y < size.height - 4; y++ )990{991uchar* dstrow = dst + dststep*y + 6;992const uchar* srow;993994for( int dy = (y == 2 ? -1 : 1); dy <= 1; dy++ )995{996ushort* brow = buf + ((y + dy - 1)%brows)*bufstep + 1;997srow = bayer + (y+dy)*bstep + 1;998999for( i = 0; i < bcn; i++ )1000brow[N*i-1] = brow[(N-2) + N*i] = 0;10011002i = 1;10031004#if CV_SSE21005if( haveSSE )1006{1007__m128i z = _mm_setzero_si128();1008for( ; i <= N-9; i += 8, srow += 8, brow += 8 )1009{1010__m128i s1, s2, s3, s4, s6, s7, s8, s9;10111012s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow-1-bstep)),z);1013s2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow-bstep)),z);1014s3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow+1-bstep)),z);10151016s4 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow-1)),z);1017s6 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow+1)),z);10181019s7 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow-1+bstep)),z);1020s8 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow+bstep)),z);1021s9 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow+1+bstep)),z);10221023__m128i b0, b1, b2, b3, b4, b5, b6;10241025b0 = _mm_adds_epu16(_mm_slli_epi16(_mm_absdiff_epu16(s2,s8),1),1026_mm_adds_epu16(_mm_absdiff_epu16(s1, s7),1027_mm_absdiff_epu16(s3, s9)));1028b1 = _mm_adds_epu16(_mm_slli_epi16(_mm_absdiff_epu16(s4,s6),1),1029_mm_adds_epu16(_mm_absdiff_epu16(s1, s3),1030_mm_absdiff_epu16(s7, s9)));1031b2 = _mm_slli_epi16(_mm_absdiff_epu16(s3,s7),1);1032b3 = _mm_slli_epi16(_mm_absdiff_epu16(s1,s9),1);10331034_mm_storeu_si128((__m128i*)brow, b0);1035_mm_storeu_si128((__m128i*)(brow + N), b1);1036_mm_storeu_si128((__m128i*)(brow + N2), b2);1037_mm_storeu_si128((__m128i*)(brow + N3), b3);10381039b4 = _mm_adds_epu16(b2,_mm_adds_epu16(_mm_absdiff_epu16(s2, s4),1040_mm_absdiff_epu16(s6, s8)));1041b5 = _mm_adds_epu16(b3,_mm_adds_epu16(_mm_absdiff_epu16(s2, s6),1042_mm_absdiff_epu16(s4, s8)));1043b6 = _mm_adds_epu16(_mm_adds_epu16(s2, s4), _mm_adds_epu16(s6, s8));1044b6 = _mm_srli_epi16(b6, 1);10451046_mm_storeu_si128((__m128i*)(brow + N4), b4);1047_mm_storeu_si128((__m128i*)(brow + N5), b5);1048_mm_storeu_si128((__m128i*)(brow + N6), b6);1049}1050}1051#endif10521053for( ; i < N-1; i++, srow++, brow++ )1054{1055brow[0] = (ushort)(std::abs(srow[-1-bstep] - srow[-1+bstep]) +1056std::abs(srow[-bstep] - srow[+bstep])*2 +1057std::abs(srow[1-bstep] - srow[1+bstep]));1058brow[N] = (ushort)(std::abs(srow[-1-bstep] - srow[1-bstep]) +1059std::abs(srow[-1] - srow[1])*2 +1060std::abs(srow[-1+bstep] - srow[1+bstep]));1061brow[N2] = (ushort)(std::abs(srow[+1-bstep] - srow[-1+bstep])*2);1062brow[N3] = (ushort)(std::abs(srow[-1-bstep] - srow[1+bstep])*2);1063brow[N4] = (ushort)(brow[N2] + std::abs(srow[-bstep] - srow[-1]) +1064std::abs(srow[+bstep] - srow[1]));1065brow[N5] = (ushort)(brow[N3] + std::abs(srow[-bstep] - srow[1]) +1066std::abs(srow[+bstep] - srow[-1]));1067brow[N6] = (ushort)((srow[-bstep] + srow[-1] + srow[1] + srow[+bstep])>>1);1068}1069}10701071const ushort* brow0 = buf + ((y - 2) % brows)*bufstep + 2;1072const ushort* brow1 = buf + ((y - 1) % brows)*bufstep + 2;1073const ushort* brow2 = buf + (y % brows)*bufstep + 2;1074static const float scale[] = { 0.f, 0.5f, 0.25f, 0.1666666666667f, 0.125f, 0.1f, 0.08333333333f, 0.0714286f, 0.0625f };1075srow = bayer + y*bstep + 2;1076bool greenCell = greenCell0;10771078i = 2;1079#if CV_SSE21080int limit = !haveSSE ? N-2 : greenCell ? std::min(3, N-2) : 2;1081#else1082int limit = N - 2;1083#endif10841085do1086{1087for( ; i < limit; i++, srow++, brow0++, brow1++, brow2++, dstrow += 3 )1088{1089int gradN = brow0[0] + brow1[0];1090int gradS = brow1[0] + brow2[0];1091int gradW = brow1[N-1] + brow1[N];1092int gradE = brow1[N] + brow1[N+1];1093int minGrad = std::min(std::min(std::min(gradN, gradS), gradW), gradE);1094int maxGrad = std::max(std::max(std::max(gradN, gradS), gradW), gradE);1095int R, G, B;10961097if( !greenCell )1098{1099int gradNE = brow0[N4+1] + brow1[N4];1100int gradSW = brow1[N4] + brow2[N4-1];1101int gradNW = brow0[N5-1] + brow1[N5];1102int gradSE = brow1[N5] + brow2[N5+1];11031104minGrad = std::min(std::min(std::min(std::min(minGrad, gradNE), gradSW), gradNW), gradSE);1105maxGrad = std::max(std::max(std::max(std::max(maxGrad, gradNE), gradSW), gradNW), gradSE);1106int T = minGrad + MAX(maxGrad/2, 1);11071108int Rs = 0, Gs = 0, Bs = 0, ng = 0;1109if( gradN < T )1110{1111Rs += srow[-bstep*2] + srow[0];1112Gs += srow[-bstep]*2;1113Bs += srow[-bstep-1] + srow[-bstep+1];1114ng++;1115}1116if( gradS < T )1117{1118Rs += srow[bstep*2] + srow[0];1119Gs += srow[bstep]*2;1120Bs += srow[bstep-1] + srow[bstep+1];1121ng++;1122}1123if( gradW < T )1124{1125Rs += srow[-2] + srow[0];1126Gs += srow[-1]*2;1127Bs += srow[-bstep-1] + srow[bstep-1];1128ng++;1129}1130if( gradE < T )1131{1132Rs += srow[2] + srow[0];1133Gs += srow[1]*2;1134Bs += srow[-bstep+1] + srow[bstep+1];1135ng++;1136}1137if( gradNE < T )1138{1139Rs += srow[-bstep*2+2] + srow[0];1140Gs += brow0[N6+1];1141Bs += srow[-bstep+1]*2;1142ng++;1143}1144if( gradSW < T )1145{1146Rs += srow[bstep*2-2] + srow[0];1147Gs += brow2[N6-1];1148Bs += srow[bstep-1]*2;1149ng++;1150}1151if( gradNW < T )1152{1153Rs += srow[-bstep*2-2] + srow[0];1154Gs += brow0[N6-1];1155Bs += srow[-bstep+1]*2;1156ng++;1157}1158if( gradSE < T )1159{1160Rs += srow[bstep*2+2] + srow[0];1161Gs += brow2[N6+1];1162Bs += srow[-bstep+1]*2;1163ng++;1164}1165R = srow[0];1166G = R + cvRound((Gs - Rs)*scale[ng]);1167B = R + cvRound((Bs - Rs)*scale[ng]);1168}1169else1170{1171int gradNE = brow0[N2] + brow0[N2+1] + brow1[N2] + brow1[N2+1];1172int gradSW = brow1[N2] + brow1[N2-1] + brow2[N2] + brow2[N2-1];1173int gradNW = brow0[N3] + brow0[N3-1] + brow1[N3] + brow1[N3-1];1174int gradSE = brow1[N3] + brow1[N3+1] + brow2[N3] + brow2[N3+1];11751176minGrad = std::min(std::min(std::min(std::min(minGrad, gradNE), gradSW), gradNW), gradSE);1177maxGrad = std::max(std::max(std::max(std::max(maxGrad, gradNE), gradSW), gradNW), gradSE);1178int T = minGrad + MAX(maxGrad/2, 1);11791180int Rs = 0, Gs = 0, Bs = 0, ng = 0;1181if( gradN < T )1182{1183Rs += srow[-bstep*2-1] + srow[-bstep*2+1];1184Gs += srow[-bstep*2] + srow[0];1185Bs += srow[-bstep]*2;1186ng++;1187}1188if( gradS < T )1189{1190Rs += srow[bstep*2-1] + srow[bstep*2+1];1191Gs += srow[bstep*2] + srow[0];1192Bs += srow[bstep]*2;1193ng++;1194}1195if( gradW < T )1196{1197Rs += srow[-1]*2;1198Gs += srow[-2] + srow[0];1199Bs += srow[-bstep-2]+srow[bstep-2];1200ng++;1201}1202if( gradE < T )1203{1204Rs += srow[1]*2;1205Gs += srow[2] + srow[0];1206Bs += srow[-bstep+2]+srow[bstep+2];1207ng++;1208}1209if( gradNE < T )1210{1211Rs += srow[-bstep*2+1] + srow[1];1212Gs += srow[-bstep+1]*2;1213Bs += srow[-bstep] + srow[-bstep+2];1214ng++;1215}1216if( gradSW < T )1217{1218Rs += srow[bstep*2-1] + srow[-1];1219Gs += srow[bstep-1]*2;1220Bs += srow[bstep] + srow[bstep-2];1221ng++;1222}1223if( gradNW < T )1224{1225Rs += srow[-bstep*2-1] + srow[-1];1226Gs += srow[-bstep-1]*2;1227Bs += srow[-bstep-2]+srow[-bstep];1228ng++;1229}1230if( gradSE < T )1231{1232Rs += srow[bstep*2+1] + srow[1];1233Gs += srow[bstep+1]*2;1234Bs += srow[bstep+2]+srow[bstep];1235ng++;1236}1237G = srow[0];1238R = G + cvRound((Rs - Gs)*scale[ng]);1239B = G + cvRound((Bs - Gs)*scale[ng]);1240}1241dstrow[blueIdx] = cv::saturate_cast<uchar>(B);1242dstrow[1] = cv::saturate_cast<uchar>(G);1243dstrow[blueIdx^2] = cv::saturate_cast<uchar>(R);1244greenCell = !greenCell;1245}12461247#if CV_SSE21248if( !haveSSE )1249break;12501251__m128i emask = _mm_set1_epi32(0x0000ffff),1252omask = _mm_set1_epi32(0xffff0000),1253z = _mm_setzero_si128(),1254one = _mm_set1_epi16(1);1255__m128 _0_5 = _mm_set1_ps(0.5f);12561257#define _mm_merge_epi16(a, b) _mm_or_si128(_mm_and_si128(a, emask), _mm_and_si128(b, omask)) //(aA_aA_aA_aA) * (bB_bB_bB_bB) => (bA_bA_bA_bA)1258#define _mm_cvtloepi16_ps(a) _mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpacklo_epi16(a,a), 16)) //(1,2,3,4,5,6,7,8) => (1f,2f,3f,4f)1259#define _mm_cvthiepi16_ps(a) _mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpackhi_epi16(a,a), 16)) //(1,2,3,4,5,6,7,8) => (5f,6f,7f,8f)1260#define _mm_loadl_u8_s16(ptr, offset) _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)((ptr) + (offset))), z) //load 8 uchars to 8 shorts12611262// process 8 pixels at once1263for( ; i <= N - 10; i += 8, srow += 8, brow0 += 8, brow1 += 8, brow2 += 8 )1264{1265//int gradN = brow0[0] + brow1[0];1266__m128i gradN = _mm_adds_epi16(_mm_loadu_si128((__m128i*)brow0), _mm_loadu_si128((__m128i*)brow1));12671268//int gradS = brow1[0] + brow2[0];1269__m128i gradS = _mm_adds_epi16(_mm_loadu_si128((__m128i*)brow1), _mm_loadu_si128((__m128i*)brow2));12701271//int gradW = brow1[N-1] + brow1[N];1272__m128i gradW = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N-1)), _mm_loadu_si128((__m128i*)(brow1+N)));12731274//int gradE = brow1[N+1] + brow1[N];1275__m128i gradE = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N+1)), _mm_loadu_si128((__m128i*)(brow1+N)));12761277//int minGrad = std::min(std::min(std::min(gradN, gradS), gradW), gradE);1278//int maxGrad = std::max(std::max(std::max(gradN, gradS), gradW), gradE);1279__m128i minGrad = _mm_min_epi16(_mm_min_epi16(gradN, gradS), _mm_min_epi16(gradW, gradE));1280__m128i maxGrad = _mm_max_epi16(_mm_max_epi16(gradN, gradS), _mm_max_epi16(gradW, gradE));12811282__m128i grad0, grad1;12831284//int gradNE = brow0[N4+1] + brow1[N4];1285//int gradNE = brow0[N2] + brow0[N2+1] + brow1[N2] + brow1[N2+1];1286grad0 = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow0+N4+1)), _mm_loadu_si128((__m128i*)(brow1+N4)));1287grad1 = _mm_adds_epi16( _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow0+N2)), _mm_loadu_si128((__m128i*)(brow0+N2+1))),1288_mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N2)), _mm_loadu_si128((__m128i*)(brow1+N2+1))));1289__m128i gradNE = _mm_merge_epi16(grad0, grad1);12901291//int gradSW = brow1[N4] + brow2[N4-1];1292//int gradSW = brow1[N2] + brow1[N2-1] + brow2[N2] + brow2[N2-1];1293grad0 = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow2+N4-1)), _mm_loadu_si128((__m128i*)(brow1+N4)));1294grad1 = _mm_adds_epi16(_mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow2+N2)), _mm_loadu_si128((__m128i*)(brow2+N2-1))),1295_mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N2)), _mm_loadu_si128((__m128i*)(brow1+N2-1))));1296__m128i gradSW = _mm_merge_epi16(grad0, grad1);12971298minGrad = _mm_min_epi16(_mm_min_epi16(minGrad, gradNE), gradSW);1299maxGrad = _mm_max_epi16(_mm_max_epi16(maxGrad, gradNE), gradSW);13001301//int gradNW = brow0[N5-1] + brow1[N5];1302//int gradNW = brow0[N3] + brow0[N3-1] + brow1[N3] + brow1[N3-1];1303grad0 = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow0+N5-1)), _mm_loadu_si128((__m128i*)(brow1+N5)));1304grad1 = _mm_adds_epi16(_mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow0+N3)), _mm_loadu_si128((__m128i*)(brow0+N3-1))),1305_mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N3)), _mm_loadu_si128((__m128i*)(brow1+N3-1))));1306__m128i gradNW = _mm_merge_epi16(grad0, grad1);13071308//int gradSE = brow1[N5] + brow2[N5+1];1309//int gradSE = brow1[N3] + brow1[N3+1] + brow2[N3] + brow2[N3+1];1310grad0 = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow2+N5+1)), _mm_loadu_si128((__m128i*)(brow1+N5)));1311grad1 = _mm_adds_epi16(_mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow2+N3)), _mm_loadu_si128((__m128i*)(brow2+N3+1))),1312_mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N3)), _mm_loadu_si128((__m128i*)(brow1+N3+1))));1313__m128i gradSE = _mm_merge_epi16(grad0, grad1);13141315minGrad = _mm_min_epi16(_mm_min_epi16(minGrad, gradNW), gradSE);1316maxGrad = _mm_max_epi16(_mm_max_epi16(maxGrad, gradNW), gradSE);13171318//int T = minGrad + maxGrad/2;1319__m128i T = _mm_adds_epi16(_mm_max_epi16(_mm_srli_epi16(maxGrad, 1), one), minGrad);13201321__m128i RGs = z, GRs = z, Bs = z, ng = z;13221323__m128i x0 = _mm_loadl_u8_s16(srow, +0 );1324__m128i x1 = _mm_loadl_u8_s16(srow, -1 - bstep );1325__m128i x2 = _mm_loadl_u8_s16(srow, -1 - bstep*2);1326__m128i x3 = _mm_loadl_u8_s16(srow, - bstep );1327__m128i x4 = _mm_loadl_u8_s16(srow, +1 - bstep*2);1328__m128i x5 = _mm_loadl_u8_s16(srow, +1 - bstep );1329__m128i x6 = _mm_loadl_u8_s16(srow, +2 - bstep );1330__m128i x7 = _mm_loadl_u8_s16(srow, +1 );1331__m128i x8 = _mm_loadl_u8_s16(srow, +2 + bstep );1332__m128i x9 = _mm_loadl_u8_s16(srow, +1 + bstep );1333__m128i x10 = _mm_loadl_u8_s16(srow, +1 + bstep*2);1334__m128i x11 = _mm_loadl_u8_s16(srow, + bstep );1335__m128i x12 = _mm_loadl_u8_s16(srow, -1 + bstep*2);1336__m128i x13 = _mm_loadl_u8_s16(srow, -1 + bstep );1337__m128i x14 = _mm_loadl_u8_s16(srow, -2 + bstep );1338__m128i x15 = _mm_loadl_u8_s16(srow, -1 );1339__m128i x16 = _mm_loadl_u8_s16(srow, -2 - bstep );13401341__m128i t0, t1, mask;13421343// gradN ***********************************************1344mask = _mm_cmpgt_epi16(T, gradN); // mask = T>gradN1345ng = _mm_sub_epi16(ng, mask); // ng += (T>gradN)13461347t0 = _mm_slli_epi16(x3, 1); // srow[-bstep]*21348t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, -bstep*2), x0); // srow[-bstep*2] + srow[0]13491350// RGs += (srow[-bstep*2] + srow[0]) * (T>gradN)1351RGs = _mm_adds_epi16(RGs, _mm_and_si128(t1, mask));1352// GRs += {srow[-bstep]*2; (srow[-bstep*2-1] + srow[-bstep*2+1])} * (T>gradN)1353GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(t0, _mm_adds_epi16(x2,x4)), mask));1354// Bs += {(srow[-bstep-1]+srow[-bstep+1]); srow[-bstep]*2 } * (T>gradN)1355Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_adds_epi16(x1,x5), t0), mask));13561357// gradNE **********************************************1358mask = _mm_cmpgt_epi16(T, gradNE); // mask = T>gradNE1359ng = _mm_sub_epi16(ng, mask); // ng += (T>gradNE)13601361t0 = _mm_slli_epi16(x5, 1); // srow[-bstep+1]*21362t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, -bstep*2+2), x0); // srow[-bstep*2+2] + srow[0]13631364// RGs += {(srow[-bstep*2+2] + srow[0]); srow[-bstep+1]*2} * (T>gradNE)1365RGs = _mm_adds_epi16(RGs, _mm_and_si128(_mm_merge_epi16(t1, t0), mask));1366// GRs += {brow0[N6+1]; (srow[-bstep*2+1] + srow[1])} * (T>gradNE)1367GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(_mm_loadu_si128((__m128i*)(brow0+N6+1)), _mm_adds_epi16(x4,x7)), mask));1368// Bs += {srow[-bstep+1]*2; (srow[-bstep] + srow[-bstep+2])} * (T>gradNE)1369Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(t0,_mm_adds_epi16(x3,x6)), mask));13701371// gradE ***********************************************1372mask = _mm_cmpgt_epi16(T, gradE); // mask = T>gradE1373ng = _mm_sub_epi16(ng, mask); // ng += (T>gradE)13741375t0 = _mm_slli_epi16(x7, 1); // srow[1]*21376t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, 2), x0); // srow[2] + srow[0]13771378// RGs += (srow[2] + srow[0]) * (T>gradE)1379RGs = _mm_adds_epi16(RGs, _mm_and_si128(t1, mask));1380// GRs += (srow[1]*2) * (T>gradE)1381GRs = _mm_adds_epi16(GRs, _mm_and_si128(t0, mask));1382// Bs += {(srow[-bstep+1]+srow[bstep+1]); (srow[-bstep+2]+srow[bstep+2])} * (T>gradE)1383Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_adds_epi16(x5,x9), _mm_adds_epi16(x6,x8)), mask));13841385// gradSE **********************************************1386mask = _mm_cmpgt_epi16(T, gradSE); // mask = T>gradSE1387ng = _mm_sub_epi16(ng, mask); // ng += (T>gradSE)13881389t0 = _mm_slli_epi16(x9, 1); // srow[bstep+1]*21390t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, bstep*2+2), x0); // srow[bstep*2+2] + srow[0]13911392// RGs += {(srow[bstep*2+2] + srow[0]); srow[bstep+1]*2} * (T>gradSE)1393RGs = _mm_adds_epi16(RGs, _mm_and_si128(_mm_merge_epi16(t1, t0), mask));1394// GRs += {brow2[N6+1]; (srow[1]+srow[bstep*2+1])} * (T>gradSE)1395GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(_mm_loadu_si128((__m128i*)(brow2+N6+1)), _mm_adds_epi16(x7,x10)), mask));1396// Bs += {srow[-bstep+1]*2; (srow[bstep+2]+srow[bstep])} * (T>gradSE)1397Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_slli_epi16(x5, 1), _mm_adds_epi16(x8,x11)), mask));13981399// gradS ***********************************************1400mask = _mm_cmpgt_epi16(T, gradS); // mask = T>gradS1401ng = _mm_sub_epi16(ng, mask); // ng += (T>gradS)14021403t0 = _mm_slli_epi16(x11, 1); // srow[bstep]*21404t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow,bstep*2), x0); // srow[bstep*2]+srow[0]14051406// RGs += (srow[bstep*2]+srow[0]) * (T>gradS)1407RGs = _mm_adds_epi16(RGs, _mm_and_si128(t1, mask));1408// GRs += {srow[bstep]*2; (srow[bstep*2+1]+srow[bstep*2-1])} * (T>gradS)1409GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(t0, _mm_adds_epi16(x10,x12)), mask));1410// Bs += {(srow[bstep+1]+srow[bstep-1]); srow[bstep]*2} * (T>gradS)1411Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_adds_epi16(x9,x13), t0), mask));14121413// gradSW **********************************************1414mask = _mm_cmpgt_epi16(T, gradSW); // mask = T>gradSW1415ng = _mm_sub_epi16(ng, mask); // ng += (T>gradSW)14161417t0 = _mm_slli_epi16(x13, 1); // srow[bstep-1]*21418t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, bstep*2-2), x0); // srow[bstep*2-2]+srow[0]14191420// RGs += {(srow[bstep*2-2]+srow[0]); srow[bstep-1]*2} * (T>gradSW)1421RGs = _mm_adds_epi16(RGs, _mm_and_si128(_mm_merge_epi16(t1, t0), mask));1422// GRs += {brow2[N6-1]; (srow[bstep*2-1]+srow[-1])} * (T>gradSW)1423GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(_mm_loadu_si128((__m128i*)(brow2+N6-1)), _mm_adds_epi16(x12,x15)), mask));1424// Bs += {srow[bstep-1]*2; (srow[bstep]+srow[bstep-2])} * (T>gradSW)1425Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(t0,_mm_adds_epi16(x11,x14)), mask));14261427// gradW ***********************************************1428mask = _mm_cmpgt_epi16(T, gradW); // mask = T>gradW1429ng = _mm_sub_epi16(ng, mask); // ng += (T>gradW)14301431t0 = _mm_slli_epi16(x15, 1); // srow[-1]*21432t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, -2), x0); // srow[-2]+srow[0]14331434// RGs += (srow[-2]+srow[0]) * (T>gradW)1435RGs = _mm_adds_epi16(RGs, _mm_and_si128(t1, mask));1436// GRs += (srow[-1]*2) * (T>gradW)1437GRs = _mm_adds_epi16(GRs, _mm_and_si128(t0, mask));1438// Bs += {(srow[-bstep-1]+srow[bstep-1]); (srow[bstep-2]+srow[-bstep-2])} * (T>gradW)1439Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_adds_epi16(x1,x13), _mm_adds_epi16(x14,x16)), mask));14401441// gradNW **********************************************1442mask = _mm_cmpgt_epi16(T, gradNW); // mask = T>gradNW1443ng = _mm_sub_epi16(ng, mask); // ng += (T>gradNW)14441445t0 = _mm_slli_epi16(x1, 1); // srow[-bstep-1]*21446t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow,-bstep*2-2), x0); // srow[-bstep*2-2]+srow[0]14471448// RGs += {(srow[-bstep*2-2]+srow[0]); srow[-bstep-1]*2} * (T>gradNW)1449RGs = _mm_adds_epi16(RGs, _mm_and_si128(_mm_merge_epi16(t1, t0), mask));1450// GRs += {brow0[N6-1]; (srow[-bstep*2-1]+srow[-1])} * (T>gradNW)1451GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(_mm_loadu_si128((__m128i*)(brow0+N6-1)), _mm_adds_epi16(x2,x15)), mask));1452// Bs += {srow[-bstep-1]*2; (srow[-bstep]+srow[-bstep-2])} * (T>gradNW)1453Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_slli_epi16(x5, 1),_mm_adds_epi16(x3,x16)), mask));14541455__m128 ngf0 = _mm_div_ps(_0_5, _mm_cvtloepi16_ps(ng));1456__m128 ngf1 = _mm_div_ps(_0_5, _mm_cvthiepi16_ps(ng));14571458// now interpolate r, g & b1459t0 = _mm_subs_epi16(GRs, RGs);1460t1 = _mm_subs_epi16(Bs, RGs);14611462t0 = _mm_add_epi16(x0, _mm_packs_epi32(1463_mm_cvtps_epi32(_mm_mul_ps(_mm_cvtloepi16_ps(t0), ngf0)),1464_mm_cvtps_epi32(_mm_mul_ps(_mm_cvthiepi16_ps(t0), ngf1))));14651466t1 = _mm_add_epi16(x0, _mm_packs_epi32(1467_mm_cvtps_epi32(_mm_mul_ps(_mm_cvtloepi16_ps(t1), ngf0)),1468_mm_cvtps_epi32(_mm_mul_ps(_mm_cvthiepi16_ps(t1), ngf1))));14691470x1 = _mm_merge_epi16(x0, t0);1471x2 = _mm_merge_epi16(t0, x0);14721473uchar R[8], G[8], B[8];14741475_mm_storel_epi64(blueIdx ? (__m128i*)B : (__m128i*)R, _mm_packus_epi16(x1, z));1476_mm_storel_epi64((__m128i*)G, _mm_packus_epi16(x2, z));1477_mm_storel_epi64(blueIdx ? (__m128i*)R : (__m128i*)B, _mm_packus_epi16(t1, z));14781479for( int j = 0; j < 8; j++, dstrow += 3 )1480{1481dstrow[0] = B[j]; dstrow[1] = G[j]; dstrow[2] = R[j];1482}1483}1484#endif14851486limit = N - 2;1487}1488while( i < N - 2 );14891490for( i = 0; i < 6; i++ )1491{1492dst[dststep*y + 5 - i] = dst[dststep*y + 8 - i];1493dst[dststep*y + (N - 2)*3 + i] = dst[dststep*y + (N - 3)*3 + i];1494}14951496greenCell0 = !greenCell0;1497blueIdx ^= 2;1498}14991500for( i = 0; i < size.width*3; i++ )1501{1502dst[i] = dst[i + dststep] = dst[i + dststep*2];1503dst[i + dststep*(size.height-4)] =1504dst[i + dststep*(size.height-3)] =1505dst[i + dststep*(size.height-2)] =1506dst[i + dststep*(size.height-1)] = dst[i + dststep*(size.height-5)];1507}1508}15091510//////////////////////////////// Edge-Aware Demosaicing //////////////////////////////////15111512template <typename T, typename SIMDInterpolator>1513class Bayer2RGB_EdgeAware_T_Invoker :1514public cv::ParallelLoopBody1515{1516public:1517Bayer2RGB_EdgeAware_T_Invoker(const Mat& _src, Mat& _dst, const Size& _size,1518int _blue, int _start_with_green) :1519ParallelLoopBody(),1520src(_src), dst(_dst), size(_size), Blue(_blue), Start_with_green(_start_with_green)1521{1522}15231524virtual void operator()(const Range& range) const CV_OVERRIDE1525{1526int dcn = dst.channels();1527int dcn2 = dcn<<1;1528int start_with_green = Start_with_green, blue = Blue;1529int sstep = int(src.step / src.elemSize1()), dstep = int(dst.step / dst.elemSize1());1530SIMDInterpolator vecOp;15311532const T* S = src.ptr<T>(range.start + 1) + 1;1533T* D = reinterpret_cast<T*>(dst.data + (range.start + 1) * dst.step) + dcn;15341535if (range.start % 2)1536{1537start_with_green ^= 1;1538blue ^= 1;1539}15401541// to BGR1542for (int y = range.start; y < range.end; ++y)1543{1544int x = 1;1545if (start_with_green)1546{1547D[blue<<1] = (S[-sstep] + S[sstep]) >> 1;1548D[1] = S[0];1549D[2-(blue<<1)] = (S[-1] + S[1]) >> 1;1550D += dcn;1551++S;1552++x;1553}15541555int delta = vecOp.bayer2RGB_EA(S - sstep - 1, sstep, D, size.width, blue);1556x += delta;1557S += delta;1558D += dcn * delta;15591560if (blue)1561for (; x < size.width; x += 2, S += 2, D += dcn2)1562{1563D[0] = S[0];1564D[1] = (std::abs(S[-1] - S[1]) > std::abs(S[sstep] - S[-sstep]) ? (S[sstep] + S[-sstep] + 1) : (S[-1] + S[1] + 1)) >> 1;1565D[2] = (S[-sstep-1] + S[-sstep+1] + S[sstep-1] + S[sstep+1]) >> 2;15661567D[3] = (S[0] + S[2] + 1) >> 1;1568D[4] = S[1];1569D[5] = (S[-sstep+1] + S[sstep+1] + 1) >> 1;1570}1571else1572for (; x < size.width; x += 2, S += 2, D += dcn2)1573{1574D[0] = (S[-sstep-1] + S[-sstep+1] + S[sstep-1] + S[sstep+1] + 2) >> 2;1575D[1] = (std::abs(S[-1] - S[1]) > std::abs(S[sstep] - S[-sstep]) ? (S[sstep] + S[-sstep] + 1) : (S[-1] + S[1] + 1)) >> 1;1576D[2] = S[0];15771578D[3] = (S[-sstep+1] + S[sstep+1] + 1) >> 1;1579D[4] = S[1];1580D[5] = (S[0] + S[2] + 1) >> 1;1581}15821583if (x <= size.width)1584{1585D[blue<<1] = (S[-sstep-1] + S[-sstep+1] + S[sstep-1] + S[sstep+1] + 2) >> 2;1586D[1] = (std::abs(S[-1] - S[1]) > std::abs(S[sstep] - S[-sstep]) ? (S[sstep] + S[-sstep] + 1) : (S[-1] + S[1] + 1)) >> 1;1587D[2-(blue<<1)] = S[0];1588D += dcn;1589++S;1590}15911592for (int i = 0; i < dcn; ++i)1593{1594D[i] = D[-dcn + i];1595D[-dstep+dcn+i] = D[-dstep+(dcn<<1)+i];1596}15971598start_with_green ^= 1;1599blue ^= 1;1600S += 2;1601D += dcn2;1602}1603}16041605private:1606Mat src;1607Mat dst;1608Size size;1609int Blue, Start_with_green;1610};16111612template <typename T, typename SIMDInterpolator>1613static void Bayer2RGB_EdgeAware_T(const Mat& src, Mat& dst, int code)1614{1615Size size = src.size();16161617// for small sizes1618if (size.width <= 2 || size.height <= 2)1619{1620dst = Scalar::all(0);1621return;1622}16231624size.width -= 2;1625size.height -= 2;16261627int start_with_green = code == CV_BayerGB2BGR_EA || code == CV_BayerGR2BGR_EA ? 1 : 0;1628int blue = code == CV_BayerGB2BGR_EA || code == CV_BayerBG2BGR_EA ? 1 : 0;16291630if (size.height > 0)1631{1632Bayer2RGB_EdgeAware_T_Invoker<T, SIMDInterpolator> invoker(src, dst, size, blue, start_with_green);1633Range range(0, size.height);1634parallel_for_(range, invoker, dst.total()/static_cast<double>(1<<16));1635}1636size = dst.size();1637size.width *= dst.channels();1638size_t dstep = dst.step / dst.elemSize1();1639T* firstRow = dst.ptr<T>();1640T* lastRow = dst.ptr<T>() + (size.height-1) * dstep;16411642if (size.height > 2)1643{1644for (int x = 0; x < size.width; ++x)1645{1646firstRow[x] = (firstRow+dstep)[x];1647lastRow[x] = (lastRow-dstep)[x];1648}1649}1650else1651for (int x = 0; x < size.width; ++x)1652firstRow[x] = lastRow[x] = 0;1653}16541655} // end namespace cv16561657//////////////////////////////////////////////////////////////////////////////////////////1658// The main Demosaicing function //1659//////////////////////////////////////////////////////////////////////////////////////////16601661void cv::demosaicing(InputArray _src, OutputArray _dst, int code, int dcn)1662{1663CV_INSTRUMENT_REGION();16641665Mat src = _src.getMat(), dst;1666Size sz = src.size();1667int scn = src.channels(), depth = src.depth();16681669CV_Assert(depth == CV_8U || depth == CV_16U);1670CV_Assert(!src.empty());16711672switch (code)1673{1674case CV_BayerBG2GRAY: case CV_BayerGB2GRAY: case CV_BayerRG2GRAY: case CV_BayerGR2GRAY:1675if (dcn <= 0)1676dcn = 1;1677CV_Assert( scn == 1 && dcn == 1 );16781679_dst.create(sz, CV_MAKETYPE(depth, dcn));1680dst = _dst.getMat();16811682if( depth == CV_8U )1683Bayer2Gray_<uchar, SIMDBayerInterpolator_8u>(src, dst, code);1684else if( depth == CV_16U )1685Bayer2Gray_<ushort, SIMDBayerStubInterpolator_<ushort> >(src, dst, code);1686else1687CV_Error(CV_StsUnsupportedFormat, "Bayer->Gray demosaicing only supports 8u and 16u types");1688break;16891690case CV_BayerBG2BGRA: case CV_BayerGB2BGRA: case CV_BayerRG2BGRA: case CV_BayerGR2BGRA:1691if (dcn <= 0)1692dcn = 4;1693/* fallthrough */1694case CV_BayerBG2BGR: case CV_BayerGB2BGR: case CV_BayerRG2BGR: case CV_BayerGR2BGR:1695case CV_BayerBG2BGR_VNG: case CV_BayerGB2BGR_VNG: case CV_BayerRG2BGR_VNG: case CV_BayerGR2BGR_VNG:1696{1697if (dcn <= 0)1698dcn = 3;1699CV_Assert( scn == 1 && (dcn == 3 || dcn == 4) );17001701_dst.create(sz, CV_MAKE_TYPE(depth, dcn));1702Mat dst_ = _dst.getMat();17031704if( code == CV_BayerBG2BGR || code == CV_BayerBG2BGRA ||1705code == CV_BayerGB2BGR || code == CV_BayerGB2BGRA ||1706code == CV_BayerRG2BGR || code == CV_BayerRG2BGRA ||1707code == CV_BayerGR2BGR || code == CV_BayerGR2BGRA )1708{1709if( depth == CV_8U )1710Bayer2RGB_<uchar, SIMDBayerInterpolator_8u>(src, dst_, code);1711else if( depth == CV_16U )1712Bayer2RGB_<ushort, SIMDBayerStubInterpolator_<ushort> >(src, dst_, code);1713else1714CV_Error(CV_StsUnsupportedFormat, "Bayer->RGB demosaicing only supports 8u and 16u types");1715}1716else1717{1718CV_Assert( depth == CV_8U );1719Bayer2RGB_VNG_8u(src, dst_, code);1720}1721}1722break;17231724case CV_BayerBG2BGR_EA: case CV_BayerGB2BGR_EA: case CV_BayerRG2BGR_EA: case CV_BayerGR2BGR_EA:1725if (dcn <= 0)1726dcn = 3;17271728CV_Assert(scn == 1 && dcn == 3);1729_dst.create(sz, CV_MAKETYPE(depth, dcn));1730dst = _dst.getMat();17311732if (depth == CV_8U)1733Bayer2RGB_EdgeAware_T<uchar, SIMDBayerInterpolator_8u>(src, dst, code);1734else if (depth == CV_16U)1735Bayer2RGB_EdgeAware_T<ushort, SIMDBayerStubInterpolator_<ushort> >(src, dst, code);1736else1737CV_Error(CV_StsUnsupportedFormat, "Bayer->RGB Edge-Aware demosaicing only currently supports 8u and 16u types");17381739break;17401741default:1742CV_Error( CV_StsBadFlag, "Unknown / unsupported color conversion code" );1743}1744}174517461747