Path: blob/master/modules/gapi/src/backends/fluid/gfluidcore.cpp
16345 views
// This file is part of OpenCV project.1// It is subject to the license terms in the LICENSE file found in the top-level directory2// of this distribution and at http://opencv.org/license.html.3//4// Copyright (C) 2018 Intel Corporation56#if !defined(GAPI_STANDALONE)78#include "precomp.hpp"910#include "opencv2/gapi/own/assert.hpp"11#include "opencv2/core/traits.hpp"12#include "opencv2/core/hal/intrin.hpp"1314#include "opencv2/gapi/core.hpp"1516#include "opencv2/gapi/fluid/gfluidbuffer.hpp"17#include "opencv2/gapi/fluid/gfluidkernel.hpp"1819#include "gfluidbuffer_priv.hpp"20#include "gfluidbackend.hpp"21#include "gfluidutils.hpp"22#include "gfluidcore.hpp"2324#include <cassert>25#include <cmath>26#include <cstdlib>2728namespace cv {29namespace gapi {30namespace fluid {3132//---------------------33//34// Arithmetic functions35//36//---------------------3738template<typename DST, typename SRC1, typename SRC2>39static inline DST absdiff(SRC1 x, SRC2 y)40{41auto result = x > y? x - y: y - x;42return saturate<DST>(result, roundf);43}4445template<typename DST, typename SRC1, typename SRC2>46static inline DST addWeighted(SRC1 src1, SRC2 src2, float alpha, float beta, float gamma)47{48float dst = src1*alpha + src2*beta + gamma;49return saturate<DST>(dst, roundf);50}5152template<typename DST, typename SRC1, typename SRC2>53static inline DST add(SRC1 x, SRC2 y)54{55return saturate<DST>(x + y, roundf);56}5758template<typename DST, typename SRC1, typename SRC2>59static inline DST sub(SRC1 x, SRC2 y)60{61return saturate<DST>(x - y, roundf);62}6364template<typename DST, typename SRC1, typename SRC2>65static inline DST subr(SRC1 x, SRC2 y)66{67return saturate<DST>(y - x, roundf); // reverse: y - x68}6970template<typename DST, typename SRC1, typename SRC2>71static inline DST mul(SRC1 x, SRC2 y, float scale=1)72{73auto result = scale * x * y;74return saturate<DST>(result, rintf);75}7677template<typename DST, typename SRC1, typename SRC2>78static inline DST div(SRC1 x, SRC2 y, float scale=1)79{80// like OpenCV: returns 0, if y=081auto result = y? scale * x / y: 0;82return saturate<DST>(result, rintf);83}8485template<typename DST, typename SRC1, typename SRC2>86static inline DST divr(SRC1 x, SRC2 y, float scale=1)87{88auto result = x? scale * y / x: 0; // reverse: y / x89return saturate<DST>(result, rintf);90}9192//---------------------------93//94// Fluid kernels: addWeighted95//96//---------------------------9798template<typename DST, typename SRC1, typename SRC2>99static void run_addweighted(Buffer &dst, const View &src1, const View &src2,100double alpha, double beta, double gamma)101{102static_assert(std::is_same<SRC1, SRC2>::value, "wrong types");103104const auto *in1 = src1.InLine<SRC1>(0);105const auto *in2 = src2.InLine<SRC2>(0);106auto *out = dst.OutLine<DST>();107108int width = dst.length();109int chan = dst.meta().chan;110int length = width * chan;111112// NB: assume in/out types are not 64-bits113auto _alpha = static_cast<float>( alpha );114auto _beta = static_cast<float>( beta );115auto _gamma = static_cast<float>( gamma );116117for (int l=0; l < length; l++)118out[l] = addWeighted<DST>(in1[l], in2[l], _alpha, _beta, _gamma);119}120121GAPI_FLUID_KERNEL(GFluidAddW, cv::gapi::core::GAddW, false)122{123static const int Window = 1;124125static void run(const View &src1, double alpha, const View &src2,126double beta, double gamma, int /*dtype*/,127Buffer &dst)128{129// DST SRC1 SRC2 OP __VA_ARGS__130BINARY_(uchar , uchar , uchar , run_addweighted, dst, src1, src2, alpha, beta, gamma);131BINARY_(uchar , ushort, ushort, run_addweighted, dst, src1, src2, alpha, beta, gamma);132BINARY_(uchar , short, short, run_addweighted, dst, src1, src2, alpha, beta, gamma);133BINARY_( short, short, short, run_addweighted, dst, src1, src2, alpha, beta, gamma);134BINARY_(ushort, ushort, ushort, run_addweighted, dst, src1, src2, alpha, beta, gamma);135BINARY_( float, uchar , uchar , run_addweighted, dst, src1, src2, alpha, beta, gamma);136BINARY_( float, ushort, ushort, run_addweighted, dst, src1, src2, alpha, beta, gamma);137BINARY_( float, short, short, run_addweighted, dst, src1, src2, alpha, beta, gamma);138139CV_Error(cv::Error::StsBadArg, "unsupported combination of types");140}141};142143//--------------------------144//145// Fluid kernels: +, -, *, /146//147//--------------------------148149enum Arithm { ARITHM_ABSDIFF, ARITHM_ADD, ARITHM_SUBTRACT, ARITHM_MULTIPLY, ARITHM_DIVIDE };150151template<typename DST, typename SRC1, typename SRC2>152static void run_arithm(Buffer &dst, const View &src1, const View &src2, Arithm arithm,153double scale=1)154{155static_assert(std::is_same<SRC1, SRC2>::value, "wrong types");156157const auto *in1 = src1.InLine<SRC1>(0);158const auto *in2 = src2.InLine<SRC2>(0);159auto *out = dst.OutLine<DST>();160161int width = dst.length();162int chan = dst.meta().chan;163int length = width * chan;164165// NB: assume in/out types are not 64-bits166float _scale = static_cast<float>( scale );167168switch (arithm)169{170case ARITHM_ABSDIFF:171for (int l=0; l < length; l++)172out[l] = absdiff<DST>(in1[l], in2[l]);173break;174case ARITHM_ADD:175for (int l=0; l < length; l++)176out[l] = add<DST>(in1[l], in2[l]);177break;178case ARITHM_SUBTRACT:179for (int l=0; l < length; l++)180out[l] = sub<DST>(in1[l], in2[l]);181break;182case ARITHM_MULTIPLY:183for (int l=0; l < length; l++)184out[l] = mul<DST>(in1[l], in2[l], _scale);185break;186case ARITHM_DIVIDE:187for (int l=0; l < length; l++)188out[l] = div<DST>(in1[l], in2[l], _scale);189break;190default: CV_Error(cv::Error::StsBadArg, "unsupported arithmetic operation");191}192}193194GAPI_FLUID_KERNEL(GFluidAdd, cv::gapi::core::GAdd, false)195{196static const int Window = 1;197198static void run(const View &src1, const View &src2, int /*dtype*/, Buffer &dst)199{200// DST SRC1 SRC2 OP __VA_ARGS__201BINARY_(uchar , uchar , uchar , run_arithm, dst, src1, src2, ARITHM_ADD);202BINARY_(uchar , short, short, run_arithm, dst, src1, src2, ARITHM_ADD);203BINARY_(uchar , float, float, run_arithm, dst, src1, src2, ARITHM_ADD);204BINARY_( short, short, short, run_arithm, dst, src1, src2, ARITHM_ADD);205BINARY_( float, uchar , uchar , run_arithm, dst, src1, src2, ARITHM_ADD);206BINARY_( float, short, short, run_arithm, dst, src1, src2, ARITHM_ADD);207BINARY_( float, float, float, run_arithm, dst, src1, src2, ARITHM_ADD);208209CV_Error(cv::Error::StsBadArg, "unsupported combination of types");210}211};212213GAPI_FLUID_KERNEL(GFluidSub, cv::gapi::core::GSub, false)214{215static const int Window = 1;216217static void run(const View &src1, const View &src2, int /*dtype*/, Buffer &dst)218{219// DST SRC1 SRC2 OP __VA_ARGS__220BINARY_(uchar , uchar , uchar , run_arithm, dst, src1, src2, ARITHM_SUBTRACT);221BINARY_(uchar , short, short, run_arithm, dst, src1, src2, ARITHM_SUBTRACT);222BINARY_(uchar , float, float, run_arithm, dst, src1, src2, ARITHM_SUBTRACT);223BINARY_( short, short, short, run_arithm, dst, src1, src2, ARITHM_SUBTRACT);224BINARY_( float, uchar , uchar , run_arithm, dst, src1, src2, ARITHM_SUBTRACT);225BINARY_( float, short, short, run_arithm, dst, src1, src2, ARITHM_SUBTRACT);226BINARY_( float, float, float, run_arithm, dst, src1, src2, ARITHM_SUBTRACT);227228CV_Error(cv::Error::StsBadArg, "unsupported combination of types");229}230};231232GAPI_FLUID_KERNEL(GFluidMul, cv::gapi::core::GMul, false)233{234static const int Window = 1;235236static void run(const View &src1, const View &src2, double scale, int /*dtype*/, Buffer &dst)237{238// DST SRC1 SRC2 OP __VA_ARGS__239BINARY_(uchar , uchar , uchar , run_arithm, dst, src1, src2, ARITHM_MULTIPLY, scale);240BINARY_(uchar , short, short, run_arithm, dst, src1, src2, ARITHM_MULTIPLY, scale);241BINARY_(uchar , float, float, run_arithm, dst, src1, src2, ARITHM_MULTIPLY, scale);242BINARY_( short, short, short, run_arithm, dst, src1, src2, ARITHM_MULTIPLY, scale);243BINARY_( float, uchar , uchar , run_arithm, dst, src1, src2, ARITHM_MULTIPLY, scale);244BINARY_( float, short, short, run_arithm, dst, src1, src2, ARITHM_MULTIPLY, scale);245BINARY_( float, float, float, run_arithm, dst, src1, src2, ARITHM_MULTIPLY, scale);246247CV_Error(cv::Error::StsBadArg, "unsupported combination of types");248}249};250251GAPI_FLUID_KERNEL(GFluidDiv, cv::gapi::core::GDiv, false)252{253static const int Window = 1;254255static void run(const View &src1, const View &src2, double scale, int /*dtype*/, Buffer &dst)256{257// DST SRC1 SRC2 OP __VA_ARGS__258BINARY_(uchar , uchar , uchar , run_arithm, dst, src1, src2, ARITHM_DIVIDE, scale);259BINARY_(uchar , short, short, run_arithm, dst, src1, src2, ARITHM_DIVIDE, scale);260BINARY_(uchar , float, float, run_arithm, dst, src1, src2, ARITHM_DIVIDE, scale);261BINARY_( short, short, short, run_arithm, dst, src1, src2, ARITHM_DIVIDE, scale);262BINARY_( float, uchar , uchar , run_arithm, dst, src1, src2, ARITHM_DIVIDE, scale);263BINARY_( float, short, short, run_arithm, dst, src1, src2, ARITHM_DIVIDE, scale);264BINARY_( float, float, float, run_arithm, dst, src1, src2, ARITHM_DIVIDE, scale);265266CV_Error(cv::Error::StsBadArg, "unsupported combination of types");267}268};269270GAPI_FLUID_KERNEL(GFluidAbsDiff, cv::gapi::core::GAbsDiff, false)271{272static const int Window = 1;273274static void run(const View &src1, const View &src2, Buffer &dst)275{276// DST SRC1 SRC2 OP __VA_ARGS__277BINARY_(uchar , uchar , uchar , run_arithm, dst, src1, src2, ARITHM_ABSDIFF);278BINARY_(ushort, ushort, ushort, run_arithm, dst, src1, src2, ARITHM_ABSDIFF);279BINARY_( short, short, short, run_arithm, dst, src1, src2, ARITHM_ABSDIFF);280BINARY_( float, float, float, run_arithm, dst, src1, src2, ARITHM_ABSDIFF);281282CV_Error(cv::Error::StsBadArg, "unsupported combination of types");283}284};285286//--------------------------------------287//288// Fluid kernels: +, -, *, / with Scalar289//290//--------------------------------------291292static inline v_uint16x8 v_add_16u(const v_uint16x8 &x, const v_uint16x8 &y) { return x + y; }293static inline v_uint16x8 v_sub_16u(const v_uint16x8 &x, const v_uint16x8 &y) { return x - y; }294static inline v_uint16x8 v_subr_16u(const v_uint16x8 &x, const v_uint16x8 &y) { return y - x; }295296static inline v_float32x4 v_add_32f(const v_float32x4 &x, const v_float32x4 &y) { return x + y; }297static inline v_float32x4 v_sub_32f(const v_float32x4 &x, const v_float32x4 &y) { return x - y; }298static inline v_float32x4 v_subr_32f(const v_float32x4 &x, const v_float32x4 &y) { return y - x; }299300static inline int s_add_8u(uchar x, uchar y) { return x + y; }301static inline int s_sub_8u(uchar x, uchar y) { return x - y; }302static inline int s_subr_8u(uchar x, uchar y) { return y - x; }303304static inline float s_add_32f(float x, float y) { return x + y; }305static inline float s_sub_32f(float x, float y) { return x - y; }306static inline float s_subr_32f(float x, float y) { return y - x; }307308// manual SIMD if important case 8UC3309static void run_arithm_s3(uchar out[], const uchar in[], int width, const uchar scalar[],310v_uint16x8 (*v_op)(const v_uint16x8&, const v_uint16x8&),311int (*s_op)(uchar, uchar))312{313int w = 0;314315#if CV_SIMD128316for (; w <= width-16; w+=16)317{318v_uint8x16 x, y, z;319v_load_deinterleave(&in[3*w], x, y, z);320321v_uint16x8 r0, r1;322323v_expand(x, r0, r1);324r0 = v_op(r0, v_setall_u16(scalar[0])); // x + scalar[0]325r1 = v_op(r1, v_setall_u16(scalar[0]));326x = v_pack(r0, r1);327328v_expand(y, r0, r1);329r0 = v_op(r0, v_setall_u16(scalar[1])); // y + scalar[1]330r1 = v_op(r1, v_setall_u16(scalar[1]));331y = v_pack(r0, r1);332333v_expand(z, r0, r1);334r0 = v_op(r0, v_setall_u16(scalar[2])); // z + scalar[2]335r1 = v_op(r1, v_setall_u16(scalar[2]));336z = v_pack(r0, r1);337338v_store_interleave(&out[3*w], x, y, z);339}340#endif341UNUSED(v_op);342for (; w < width; w++)343{344out[3*w ] = saturate<uchar>( s_op(in[3*w ], scalar[0]) );345out[3*w + 1] = saturate<uchar>( s_op(in[3*w + 1], scalar[1]) );346out[3*w + 2] = saturate<uchar>( s_op(in[3*w + 2], scalar[2]) );347}348}349350// manually SIMD if rounding 32F into 8U, single channel351static void run_arithm_s1(uchar out[], const float in[], int width, const float scalar[],352v_float32x4 (*v_op)(const v_float32x4&, const v_float32x4&),353float (*s_op)(float, float))354{355int w = 0;356357#if CV_SIMD128358for (; w <= width-16; w+=16)359{360v_float32x4 r0, r1, r2, r3;361r0 = v_load(&in[w ]);362r1 = v_load(&in[w + 4]);363r2 = v_load(&in[w + 8]);364r3 = v_load(&in[w + 12]);365366r0 = v_op(r0, v_setall_f32(scalar[0])); // r + scalar[0]367r1 = v_op(r1, v_setall_f32(scalar[0]));368r2 = v_op(r2, v_setall_f32(scalar[0]));369r3 = v_op(r3, v_setall_f32(scalar[0]));370371v_int32x4 i0, i1, i2, i3;372i0 = v_round(r0);373i1 = v_round(r1);374i2 = v_round(r2);375i3 = v_round(r3);376377v_uint16x8 us0, us1;378us0 = v_pack_u(i0, i1);379us1 = v_pack_u(i2, i3);380381v_uint8x16 uc;382uc = v_pack(us0, us1);383384v_store(&out[w], uc);385}386#endif387UNUSED(v_op);388for (; w < width; w++)389{390out[w] = saturate<uchar>(s_op(in[w], scalar[0]), std::roundf);391}392}393394static void run_arithm_s_add3(uchar out[], const uchar in[], int width, const uchar scalar[])395{396run_arithm_s3(out, in, width, scalar, v_add_16u, s_add_8u);397}398399static void run_arithm_s_sub3(uchar out[], const uchar in[], int width, const uchar scalar[])400{401run_arithm_s3(out, in, width, scalar, v_sub_16u, s_sub_8u);402}403404static void run_arithm_s_subr3(uchar out[], const uchar in[], int width, const uchar scalar[])405{406run_arithm_s3(out, in, width, scalar, v_subr_16u, s_subr_8u); // reverse: subr407}408409static void run_arithm_s_add1(uchar out[], const float in[], int width, const float scalar[])410{411run_arithm_s1(out, in, width, scalar, v_add_32f, s_add_32f);412}413414static void run_arithm_s_sub1(uchar out[], const float in[], int width, const float scalar[])415{416run_arithm_s1(out, in, width, scalar, v_sub_32f, s_sub_32f);417}418419static void run_arithm_s_subr1(uchar out[], const float in[], int width, const float scalar[])420{421run_arithm_s1(out, in, width, scalar, v_subr_32f, s_subr_32f); // reverse: subr422}423424// manually unroll the inner cycle by channels425template<typename DST, typename SRC, typename SCALAR, typename FUNC>426static void run_arithm_s(DST out[], const SRC in[], int width, int chan,427const SCALAR scalar[4], FUNC func)428{429if (chan == 4)430{431for (int w=0; w < width; w++)432{433out[4*w + 0] = func(in[4*w + 0], scalar[0]);434out[4*w + 1] = func(in[4*w + 1], scalar[1]);435out[4*w + 2] = func(in[4*w + 2], scalar[2]);436out[4*w + 3] = func(in[4*w + 3], scalar[3]);437}438}439else440if (chan == 3)441{442for (int w=0; w < width; w++)443{444out[3*w + 0] = func(in[3*w + 0], scalar[0]);445out[3*w + 1] = func(in[3*w + 1], scalar[1]);446out[3*w + 2] = func(in[3*w + 2], scalar[2]);447}448}449else450if (chan == 2)451{452for (int w=0; w < width; w++)453{454out[2*w + 0] = func(in[2*w + 0], scalar[0]);455out[2*w + 1] = func(in[2*w + 1], scalar[1]);456}457}458else459if (chan == 1)460{461for (int w=0; w < width; w++)462{463out[w] = func(in[w], scalar[0]);464}465}466else467CV_Error(cv::Error::StsBadArg, "unsupported number of channels");468}469470template<typename DST, typename SRC>471static void run_arithm_s(Buffer &dst, const View &src, const float scalar[4], Arithm arithm,472float scale=1)473{474const auto *in = src.InLine<SRC>(0);475auto *out = dst.OutLine<DST>();476477int width = dst.length();478int chan = dst.meta().chan;479480// What if we cast the scalar into the SRC type?481const SRC myscal[4] = { static_cast<SRC>(scalar[0]), static_cast<SRC>(scalar[1]),482static_cast<SRC>(scalar[2]), static_cast<SRC>(scalar[3]) };483bool usemyscal = (myscal[0] == scalar[0]) && (myscal[1] == scalar[1]) &&484(myscal[2] == scalar[2]) && (myscal[3] == scalar[3]);485486switch (arithm)487{488case ARITHM_ABSDIFF:489for (int w=0; w < width; w++)490for (int c=0; c < chan; c++)491out[chan*w + c] = absdiff<DST>(in[chan*w + c], scalar[c]);492break;493case ARITHM_ADD:494if (usemyscal)495{496if (std::is_same<DST,uchar>::value &&497std::is_same<SRC,uchar>::value &&498chan == 3)499run_arithm_s_add3((uchar*)out, (const uchar*)in, width, (const uchar*)myscal);500else if (std::is_same<DST,uchar>::value &&501std::is_same<SRC,float>::value &&502chan == 1)503run_arithm_s_add1((uchar*)out, (const float*)in, width, (const float*)myscal);504else505run_arithm_s(out, in, width, chan, myscal, add<DST,SRC,SRC>);506}507else508run_arithm_s(out, in, width, chan, scalar, add<DST,SRC,float>);509break;510case ARITHM_SUBTRACT:511if (usemyscal)512{513if (std::is_same<DST,uchar>::value &&514std::is_same<SRC,uchar>::value &&515chan == 3)516run_arithm_s_sub3((uchar*)out, (const uchar*)in, width, (const uchar*)myscal);517else if (std::is_same<DST,uchar>::value &&518std::is_same<SRC,float>::value &&519chan == 1)520run_arithm_s_sub1((uchar*)out, (const float*)in, width, (const float*)myscal);521else522run_arithm_s(out, in, width, chan, myscal, sub<DST,SRC,SRC>);523}524else525run_arithm_s(out, in, width, chan, scalar, sub<DST,SRC,float>);526break;527// TODO: optimize miltiplication and division528case ARITHM_MULTIPLY:529for (int w=0; w < width; w++)530for (int c=0; c < chan; c++)531out[chan*w + c] = mul<DST>(in[chan*w + c], scalar[c], scale);532break;533case ARITHM_DIVIDE:534for (int w=0; w < width; w++)535for (int c=0; c < chan; c++)536out[chan*w + c] = div<DST>(in[chan*w + c], scalar[c], scale);537break;538default: CV_Error(cv::Error::StsBadArg, "unsupported arithmetic operation");539}540}541542template<typename DST, typename SRC>543static void run_arithm_rs(Buffer &dst, const View &src, const float scalar[4], Arithm arithm,544float scale=1)545{546const auto *in = src.InLine<SRC>(0);547auto *out = dst.OutLine<DST>();548549int width = dst.length();550int chan = dst.meta().chan;551552// What if we cast the scalar into the SRC type?553const SRC myscal[4] = { static_cast<SRC>(scalar[0]), static_cast<SRC>(scalar[1]),554static_cast<SRC>(scalar[2]), static_cast<SRC>(scalar[3]) };555bool usemyscal = (myscal[0] == scalar[0]) && (myscal[1] == scalar[1]) &&556(myscal[2] == scalar[2]) && (myscal[3] == scalar[3]);557558switch (arithm)559{560case ARITHM_SUBTRACT:561if (usemyscal)562{563if (std::is_same<DST,uchar>::value &&564std::is_same<SRC,uchar>::value &&565chan == 3)566run_arithm_s_subr3((uchar*)out, (const uchar*)in, width, (const uchar*)myscal);567else if (std::is_same<DST,uchar>::value &&568std::is_same<SRC,float>::value &&569chan == 1)570run_arithm_s_subr1((uchar*)out, (const float*)in, width, (const float*)myscal);571else572run_arithm_s(out, in, width, chan, myscal, subr<DST,SRC,SRC>);573}574else575run_arithm_s(out, in, width, chan, scalar, subr<DST,SRC,float>);576break;577// TODO: optimize division578case ARITHM_DIVIDE:579for (int w=0; w < width; w++)580for (int c=0; c < chan; c++)581out[chan*w + c] = div<DST>(scalar[c], in[chan*w + c], scale);582break;583default: CV_Error(cv::Error::StsBadArg, "unsupported arithmetic operation");584}585}586587GAPI_FLUID_KERNEL(GFluidAbsDiffC, cv::gapi::core::GAbsDiffC, false)588{589static const int Window = 1;590591static void run(const View &src, const cv::Scalar &_scalar, Buffer &dst)592{593const float scalar[4] = {594static_cast<float>(_scalar[0]),595static_cast<float>(_scalar[1]),596static_cast<float>(_scalar[2]),597static_cast<float>(_scalar[3])598};599600// DST SRC OP __VA_ARGS__601UNARY_(uchar , uchar , run_arithm_s, dst, src, scalar, ARITHM_ABSDIFF);602UNARY_(ushort, ushort, run_arithm_s, dst, src, scalar, ARITHM_ABSDIFF);603UNARY_( short, short, run_arithm_s, dst, src, scalar, ARITHM_ABSDIFF);604605CV_Error(cv::Error::StsBadArg, "unsupported combination of types");606}607};608609GAPI_FLUID_KERNEL(GFluidAddC, cv::gapi::core::GAddC, false)610{611static const int Window = 1;612613static void run(const View &src, const cv::Scalar &_scalar, int /*dtype*/, Buffer &dst)614{615const float scalar[4] = {616static_cast<float>(_scalar[0]),617static_cast<float>(_scalar[1]),618static_cast<float>(_scalar[2]),619static_cast<float>(_scalar[3])620};621622// DST SRC OP __VA_ARGS__623UNARY_(uchar , uchar , run_arithm_s, dst, src, scalar, ARITHM_ADD);624UNARY_(uchar , short, run_arithm_s, dst, src, scalar, ARITHM_ADD);625UNARY_(uchar , float, run_arithm_s, dst, src, scalar, ARITHM_ADD);626UNARY_( short, short, run_arithm_s, dst, src, scalar, ARITHM_ADD);627UNARY_( float, uchar , run_arithm_s, dst, src, scalar, ARITHM_ADD);628UNARY_( float, short, run_arithm_s, dst, src, scalar, ARITHM_ADD);629UNARY_( float, float, run_arithm_s, dst, src, scalar, ARITHM_ADD);630631CV_Error(cv::Error::StsBadArg, "unsupported combination of types");632}633};634635GAPI_FLUID_KERNEL(GFluidSubC, cv::gapi::core::GSubC, false)636{637static const int Window = 1;638639static void run(const View &src, const cv::Scalar &_scalar, int /*dtype*/, Buffer &dst)640{641const float scalar[4] = {642static_cast<float>(_scalar[0]),643static_cast<float>(_scalar[1]),644static_cast<float>(_scalar[2]),645static_cast<float>(_scalar[3])646};647648// DST SRC OP __VA_ARGS__649UNARY_(uchar , uchar , run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);650UNARY_(uchar , short, run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);651UNARY_(uchar , float, run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);652UNARY_( short, short, run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);653UNARY_( float, uchar , run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);654UNARY_( float, short, run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);655UNARY_( float, float, run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);656657CV_Error(cv::Error::StsBadArg, "unsupported combination of types");658}659};660661GAPI_FLUID_KERNEL(GFluidSubRC, cv::gapi::core::GSubRC, false)662{663static const int Window = 1;664665static void run(const cv::Scalar &_scalar, const View &src, int /*dtype*/, Buffer &dst)666{667const float scalar[4] = {668static_cast<float>(_scalar[0]),669static_cast<float>(_scalar[1]),670static_cast<float>(_scalar[2]),671static_cast<float>(_scalar[3])672};673674// DST SRC OP __VA_ARGS__675UNARY_(uchar , uchar , run_arithm_rs, dst, src, scalar, ARITHM_SUBTRACT);676UNARY_(uchar , short, run_arithm_rs, dst, src, scalar, ARITHM_SUBTRACT);677UNARY_(uchar , float, run_arithm_rs, dst, src, scalar, ARITHM_SUBTRACT);678UNARY_( short, short, run_arithm_rs, dst, src, scalar, ARITHM_SUBTRACT);679UNARY_( float, uchar , run_arithm_rs, dst, src, scalar, ARITHM_SUBTRACT);680UNARY_( float, short, run_arithm_rs, dst, src, scalar, ARITHM_SUBTRACT);681UNARY_( float, float, run_arithm_rs, dst, src, scalar, ARITHM_SUBTRACT);682683CV_Error(cv::Error::StsBadArg, "unsupported combination of types");684}685};686687GAPI_FLUID_KERNEL(GFluidMulC, cv::gapi::core::GMulC, false)688{689static const int Window = 1;690691static void run(const View &src, const cv::Scalar &_scalar, int /*dtype*/, Buffer &dst)692{693const float scalar[4] = {694static_cast<float>(_scalar[0]),695static_cast<float>(_scalar[1]),696static_cast<float>(_scalar[2]),697static_cast<float>(_scalar[3])698};699const float scale = 1.f;700701// DST SRC OP __VA_ARGS__702UNARY_(uchar , uchar , run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);703UNARY_(uchar , short, run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);704UNARY_(uchar , float, run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);705UNARY_( short, short, run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);706UNARY_( float, uchar , run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);707UNARY_( float, short, run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);708UNARY_( float, float, run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);709710CV_Error(cv::Error::StsBadArg, "unsupported combination of types");711}712};713714GAPI_FLUID_KERNEL(GFluidMulCOld, cv::gapi::core::GMulCOld, false)715{716static const int Window = 1;717718static void run(const View &src, double _scalar, int /*dtype*/, Buffer &dst)719{720const float scalar[4] = {721static_cast<float>(_scalar),722static_cast<float>(_scalar),723static_cast<float>(_scalar),724static_cast<float>(_scalar)725};726const float scale = 1.f;727728// DST SRC OP __VA_ARGS__729UNARY_(uchar , uchar , run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);730UNARY_(uchar , short, run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);731UNARY_(uchar , float, run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);732UNARY_( short, short, run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);733UNARY_( float, uchar , run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);734UNARY_( float, short, run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);735UNARY_( float, float, run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);736737CV_Error(cv::Error::StsBadArg, "unsupported combination of types");738}739};740741GAPI_FLUID_KERNEL(GFluidDivC, cv::gapi::core::GDivC, false)742{743static const int Window = 1;744745static void run(const View &src, const cv::Scalar &_scalar, double _scale, int /*dtype*/,746Buffer &dst)747{748const float scalar[4] = {749static_cast<float>(_scalar[0]),750static_cast<float>(_scalar[1]),751static_cast<float>(_scalar[2]),752static_cast<float>(_scalar[3])753};754const float scale = static_cast<float>(_scale);755756// DST SRC OP __VA_ARGS__757UNARY_(uchar , uchar , run_arithm_s, dst, src, scalar, ARITHM_DIVIDE, scale);758UNARY_(uchar , short, run_arithm_s, dst, src, scalar, ARITHM_DIVIDE, scale);759UNARY_(uchar , float, run_arithm_s, dst, src, scalar, ARITHM_DIVIDE, scale);760UNARY_( short, short, run_arithm_s, dst, src, scalar, ARITHM_DIVIDE, scale);761UNARY_( float, uchar , run_arithm_s, dst, src, scalar, ARITHM_DIVIDE, scale);762UNARY_( float, short, run_arithm_s, dst, src, scalar, ARITHM_DIVIDE, scale);763UNARY_( float, float, run_arithm_s, dst, src, scalar, ARITHM_DIVIDE, scale);764765CV_Error(cv::Error::StsBadArg, "unsupported combination of types");766}767};768769GAPI_FLUID_KERNEL(GFluidDivRC, cv::gapi::core::GDivRC, false)770{771static const int Window = 1;772773static void run(const cv::Scalar &_scalar, const View &src, double _scale, int /*dtype*/,774Buffer &dst)775{776const float scalar[4] = {777static_cast<float>(_scalar[0]),778static_cast<float>(_scalar[1]),779static_cast<float>(_scalar[2]),780static_cast<float>(_scalar[3])781};782const float scale = static_cast<float>(_scale);783784// DST SRC OP __VA_ARGS__785UNARY_(uchar , uchar , run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);786UNARY_(uchar , short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);787UNARY_(uchar , float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);788UNARY_( short, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);789UNARY_( float, uchar , run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);790UNARY_( float, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);791UNARY_( float, float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);792793CV_Error(cv::Error::StsBadArg, "unsupported combination of types");794}795};796797//----------------------------798//799// Fluid math kernels: bitwise800//801//----------------------------802803enum Bitwise { BW_AND, BW_OR, BW_XOR, BW_NOT };804805template<typename DST, typename SRC1, typename SRC2>806static void run_bitwise2(Buffer &dst, const View &src1, const View &src2, Bitwise bitwise)807{808static_assert(std::is_same<DST, SRC1>::value, "wrong types");809static_assert(std::is_same<DST, SRC2>::value, "wrong types");810811const auto *in1 = src1.InLine<SRC1>(0);812const auto *in2 = src2.InLine<SRC2>(0);813auto *out = dst.OutLine<DST>();814815int width = dst.length();816int chan = dst.meta().chan;817int length = width * chan;818819switch (bitwise)820{821case BW_AND:822for (int l=0; l < length; l++)823out[l] = in1[l] & in2[l];824break;825case BW_OR:826for (int l=0; l < length; l++)827out[l] = in1[l] | in2[l];828break;829case BW_XOR:830for (int l=0; l < length; l++)831out[l] = in1[l] ^ in2[l];832break;833default: CV_Error(cv::Error::StsBadArg, "unsupported bitwise operation");834}835}836837template<typename DST, typename SRC>838static void run_bitwise1(Buffer &dst, const View &src, Bitwise bitwise)839{840static_assert(std::is_same<DST, SRC>::value, "wrong types");841842const auto *in = src.InLine<SRC>(0);843auto *out = dst.OutLine<DST>();844845int width = dst.length();846int chan = dst.meta().chan;847int length = width * chan;848849switch (bitwise)850{851case BW_NOT:852for (int l=0; l < length; l++)853out[l] = ~in[l];854break;855default: CV_Error(cv::Error::StsBadArg, "unsupported bitwise operation");856}857}858859GAPI_FLUID_KERNEL(GFluidAnd, cv::gapi::core::GAnd, false)860{861static const int Window = 1;862863static void run(const View &src1, const View &src2, Buffer &dst)864{865866// DST SRC1 SRC2 OP __VA_ARGS__867BINARY_(uchar , uchar , uchar , run_bitwise2, dst, src1, src2, BW_AND);868BINARY_(ushort, ushort, ushort, run_bitwise2, dst, src1, src2, BW_AND);869BINARY_( short, short, short, run_bitwise2, dst, src1, src2, BW_AND);870871CV_Error(cv::Error::StsBadArg, "unsupported combination of types");872}873};874875GAPI_FLUID_KERNEL(GFluidOr, cv::gapi::core::GOr, false)876{877static const int Window = 1;878879static void run(const View &src1, const View &src2, Buffer &dst)880{881882// DST SRC1 SRC2 OP __VA_ARGS__883BINARY_(uchar , uchar , uchar , run_bitwise2, dst, src1, src2, BW_OR);884BINARY_(ushort, ushort, ushort, run_bitwise2, dst, src1, src2, BW_OR);885BINARY_( short, short, short, run_bitwise2, dst, src1, src2, BW_OR);886887CV_Error(cv::Error::StsBadArg, "unsupported combination of types");888}889};890891GAPI_FLUID_KERNEL(GFluidXor, cv::gapi::core::GXor, false)892{893static const int Window = 1;894895static void run(const View &src1, const View &src2, Buffer &dst)896{897898// DST SRC1 SRC2 OP __VA_ARGS__899BINARY_(uchar , uchar , uchar , run_bitwise2, dst, src1, src2, BW_XOR);900BINARY_(ushort, ushort, ushort, run_bitwise2, dst, src1, src2, BW_XOR);901BINARY_( short, short, short, run_bitwise2, dst, src1, src2, BW_XOR);902903CV_Error(cv::Error::StsBadArg, "unsupported combination of types");904}905};906907GAPI_FLUID_KERNEL(GFluidNot, cv::gapi::core::GNot, false)908{909static const int Window = 1;910911static void run(const View &src, Buffer &dst)912{913// DST SRC OP __VA_ARGS__914UNARY_(uchar , uchar , run_bitwise1, dst, src, BW_NOT);915UNARY_(ushort, ushort, run_bitwise1, dst, src, BW_NOT);916UNARY_( short, short, run_bitwise1, dst, src, BW_NOT);917918CV_Error(cv::Error::StsBadArg, "unsupported combination of types");919}920};921922//-------------------923//924// Fluid kernels: LUT925//926//-------------------927928GAPI_FLUID_KERNEL(GFluidLUT, cv::gapi::core::GLUT, false)929{930static const int Window = 1;931932static void run(const View &src, const cv::Mat& lut, Buffer &dst)933{934GAPI_Assert(CV_8U == dst.meta().depth);935GAPI_Assert(CV_8U == src.meta().depth);936937GAPI_DbgAssert(CV_8U == lut.type());938GAPI_DbgAssert(256 == lut.cols * lut.rows);939GAPI_DbgAssert(dst.length() == src.length());940GAPI_DbgAssert(dst.meta().chan == src.meta().chan);941942const auto *in = src.InLine<uchar>(0);943auto *out = dst.OutLine<uchar>();944945int width = dst.length();946int chan = dst.meta().chan;947int length = width * chan;948949for (int l=0; l < length; l++)950out[l] = lut.data[ in[l] ];951}952};953954//-------------------------955//956// Fluid kernels: convertTo957//958//-------------------------959960template<typename DST, typename SRC>961static void run_convertto(Buffer &dst, const View &src, double _alpha, double _beta)962{963const auto *in = src.InLine<SRC>(0);964auto *out = dst.OutLine<DST>();965966int width = dst.length();967int chan = dst.meta().chan;968int length = width * chan;969970// NB: don't do this if SRC or DST is 64-bit971auto alpha = static_cast<float>( _alpha );972auto beta = static_cast<float>( _beta );973974// compute faster if no alpha no beta975if (alpha == 1 && beta == 0)976{977// manual SIMD if need rounding978if (std::is_integral<DST>::value && std::is_floating_point<SRC>::value)979{980GAPI_Assert(( std::is_same<SRC,float>::value ));981982int l = 0; // cycle index983984#if CV_SIMD128985if (std::is_same<DST,uchar>::value)986{987for (; l <= length-16; l+=16)988{989v_int32x4 i0, i1, i2, i3;990i0 = v_round( v_load( (float*)& in[l ] ) );991i1 = v_round( v_load( (float*)& in[l + 4] ) );992i2 = v_round( v_load( (float*)& in[l + 8] ) );993i3 = v_round( v_load( (float*)& in[l + 12] ) );994995v_uint16x8 us0, us1;996us0 = v_pack_u(i0, i1);997us1 = v_pack_u(i2, i3);998999v_uint8x16 uc;1000uc = v_pack(us0, us1);1001v_store((uchar*)& out[l], uc);1002}1003}1004if (std::is_same<DST,ushort>::value)1005{1006for (; l <= length-8; l+=8)1007{1008v_int32x4 i0, i1;1009i0 = v_round( v_load( (float*)& in[l ] ) );1010i1 = v_round( v_load( (float*)& in[l + 4] ) );10111012v_uint16x8 us;1013us = v_pack_u(i0, i1);1014v_store((ushort*)& out[l], us);1015}1016}1017#endif10181019// tail of SIMD cycle1020for (; l < length; l++)1021{1022out[l] = saturate<DST>(in[l], rintf);1023}1024}1025else if (std::is_integral<DST>::value) // here SRC is integral1026{1027for (int l=0; l < length; l++)1028{1029out[l] = saturate<DST>(in[l]);1030}1031}1032else // DST is floating-point, SRC is any1033{1034for (int l=0; l < length; l++)1035{1036out[l] = static_cast<DST>(in[l]);1037}1038}1039}1040else // if alpha or beta is non-trivial1041{1042// TODO: optimize if alpha and beta and data are integral1043for (int l=0; l < length; l++)1044{1045out[l] = saturate<DST>(in[l]*alpha + beta, rintf);1046}1047}1048}10491050GAPI_FLUID_KERNEL(GFluidConvertTo, cv::gapi::core::GConvertTo, false)1051{1052static const int Window = 1;10531054static void run(const View &src, int /*rtype*/, double alpha, double beta, Buffer &dst)1055{1056// DST SRC OP __VA_ARGS__1057UNARY_(uchar , uchar , run_convertto, dst, src, alpha, beta);1058UNARY_(uchar , ushort, run_convertto, dst, src, alpha, beta);1059UNARY_(uchar , float, run_convertto, dst, src, alpha, beta);1060UNARY_(ushort, uchar , run_convertto, dst, src, alpha, beta);1061UNARY_(ushort, ushort, run_convertto, dst, src, alpha, beta);1062UNARY_(ushort, float, run_convertto, dst, src, alpha, beta);1063UNARY_( float, uchar , run_convertto, dst, src, alpha, beta);1064UNARY_( float, ushort, run_convertto, dst, src, alpha, beta);1065UNARY_( float, float, run_convertto, dst, src, alpha, beta);10661067CV_Error(cv::Error::StsBadArg, "unsupported combination of types");1068}1069};10701071//-----------------------------1072//1073// Fluid math kernels: min, max1074//1075//-----------------------------10761077enum Minmax { MM_MIN, MM_MAX };10781079template<typename DST, typename SRC1, typename SRC2>1080static void run_minmax(Buffer &dst, const View &src1, const View &src2, Minmax minmax)1081{1082static_assert(std::is_same<DST, SRC1>::value, "wrong types");1083static_assert(std::is_same<DST, SRC2>::value, "wrong types");10841085const auto *in1 = src1.InLine<SRC1>(0);1086const auto *in2 = src2.InLine<SRC2>(0);1087auto *out = dst.OutLine<DST>();10881089int width = dst.length();1090int chan = dst.meta().chan;10911092int length = width * chan;10931094switch (minmax)1095{1096case MM_MIN:1097for (int l=0; l < length; l++)1098out[l] = in1[l] < in2[l]? in1[l]: in2[l];1099break;1100case MM_MAX:1101for (int l=0; l < length; l++)1102out[l] = in1[l] > in2[l]? in1[l]: in2[l];1103break;1104default: CV_Error(cv::Error::StsBadArg, "unsupported min/max operation");1105}1106}11071108GAPI_FLUID_KERNEL(GFluidMin, cv::gapi::core::GMin, false)1109{1110static const int Window = 1;11111112static void run(const View &src1, const View &src2, Buffer &dst)1113{1114// DST SRC1 SRC2 OP __VA_ARGS__1115BINARY_(uchar , uchar , uchar , run_minmax, dst, src1, src2, MM_MIN);1116BINARY_(ushort, ushort, ushort, run_minmax, dst, src1, src2, MM_MIN);1117BINARY_( short, short, short, run_minmax, dst, src1, src2, MM_MIN);1118BINARY_( float, float, float, run_minmax, dst, src1, src2, MM_MIN);11191120CV_Error(cv::Error::StsBadArg, "unsupported combination of types");1121}1122};11231124GAPI_FLUID_KERNEL(GFluidMax, cv::gapi::core::GMax, false)1125{1126static const int Window = 1;11271128static void run(const View &src1, const View &src2, Buffer &dst)1129{1130// DST SRC1 SRC2 OP __VA_ARGS__1131BINARY_(uchar , uchar , uchar , run_minmax, dst, src1, src2, MM_MAX);1132BINARY_(ushort, ushort, ushort, run_minmax, dst, src1, src2, MM_MAX);1133BINARY_( short, short, short, run_minmax, dst, src1, src2, MM_MAX);1134BINARY_( float, float, float, run_minmax, dst, src1, src2, MM_MAX);11351136CV_Error(cv::Error::StsBadArg, "unsupported combination of types");1137}1138};11391140//-----------------------1141//1142// Fluid kernels: compare1143//1144//-----------------------11451146enum Compare { CMP_EQ, CMP_NE, CMP_GE, CMP_GT, CMP_LE, CMP_LT };11471148template<typename DST, typename SRC1, typename SRC2>1149static void run_cmp(Buffer &dst, const View &src1, const View &src2, Compare compare)1150{1151static_assert(std::is_same<SRC1, SRC2>::value, "wrong types");1152static_assert(std::is_same<DST, uchar>::value, "wrong types");11531154const auto *in1 = src1.InLine<SRC1>(0);1155const auto *in2 = src2.InLine<SRC2>(0);1156auto *out = dst.OutLine<DST>();11571158int width = dst.length();1159int chan = dst.meta().chan;11601161int length = width * chan;11621163switch (compare)1164{1165case CMP_EQ:1166for (int l=0; l < length; l++)1167out[l] = in1[l] == in2[l]? 255: 0;1168break;1169case CMP_NE:1170for (int l=0; l < length; l++)1171out[l] = in1[l] != in2[l]? 255: 0;1172break;1173case CMP_GE:1174for (int l=0; l < length; l++)1175out[l] = in1[l] >= in2[l]? 255: 0;1176break;1177case CMP_LE:1178for (int l=0; l < length; l++)1179out[l] = in1[l] <= in2[l]? 255: 0;1180break;1181case CMP_GT:1182for (int l=0; l < length; l++)1183out[l] = in1[l] > in2[l]? 255: 0;1184break;1185case CMP_LT:1186for (int l=0; l < length; l++)1187out[l] = in1[l] < in2[l]? 255: 0;1188break;1189default:1190CV_Error(cv::Error::StsBadArg, "unsupported compare operation");1191}1192}11931194GAPI_FLUID_KERNEL(GFluidCmpEQ, cv::gapi::core::GCmpEQ, false)1195{1196static const int Window = 1;11971198static void run(const View &src1, const View &src2, Buffer &dst)1199{1200// DST SRC1 SRC2 OP __VA_ARGS__1201BINARY_(uchar, uchar , uchar , run_cmp, dst, src1, src2, CMP_EQ);1202BINARY_(uchar, short, short, run_cmp, dst, src1, src2, CMP_EQ);1203BINARY_(uchar, float, float, run_cmp, dst, src1, src2, CMP_EQ);12041205CV_Error(cv::Error::StsBadArg, "unsupported combination of types");1206}1207};12081209GAPI_FLUID_KERNEL(GFluidCmpNE, cv::gapi::core::GCmpNE, false)1210{1211static const int Window = 1;12121213static void run(const View &src1, const View &src2, Buffer &dst)1214{1215// DST SRC1 SRC2 OP __VA_ARGS__1216BINARY_(uchar, uchar , uchar , run_cmp, dst, src1, src2, CMP_NE);1217BINARY_(uchar, short, short, run_cmp, dst, src1, src2, CMP_NE);1218BINARY_(uchar, float, float, run_cmp, dst, src1, src2, CMP_NE);12191220CV_Error(cv::Error::StsBadArg, "unsupported combination of types");1221}1222};12231224GAPI_FLUID_KERNEL(GFluidCmpGE, cv::gapi::core::GCmpGE, false)1225{1226static const int Window = 1;12271228static void run(const View &src1, const View &src2, Buffer &dst)1229{1230// DST SRC1 SRC2 OP __VA_ARGS__1231BINARY_(uchar, uchar , uchar , run_cmp, dst, src1, src2, CMP_GE);1232BINARY_(uchar, short, short, run_cmp, dst, src1, src2, CMP_GE);1233BINARY_(uchar, float, float, run_cmp, dst, src1, src2, CMP_GE);12341235CV_Error(cv::Error::StsBadArg, "unsupported combination of types");1236}1237};12381239GAPI_FLUID_KERNEL(GFluidCmpGT, cv::gapi::core::GCmpGT, false)1240{1241static const int Window = 1;12421243static void run(const View &src1, const View &src2, Buffer &dst)1244{1245// DST SRC1 SRC2 OP __VA_ARGS__1246BINARY_(uchar, uchar , uchar , run_cmp, dst, src1, src2, CMP_GT);1247BINARY_(uchar, short, short, run_cmp, dst, src1, src2, CMP_GT);1248BINARY_(uchar, float, float, run_cmp, dst, src1, src2, CMP_GT);12491250CV_Error(cv::Error::StsBadArg, "unsupported combination of types");1251}1252};12531254GAPI_FLUID_KERNEL(GFluidCmpLE, cv::gapi::core::GCmpLE, false)1255{1256static const int Window = 1;12571258static void run(const View &src1, const View &src2, Buffer &dst)1259{1260// DST SRC1 SRC2 OP __VA_ARGS__1261BINARY_(uchar, uchar , uchar , run_cmp, dst, src1, src2, CMP_LE);1262BINARY_(uchar, short, short, run_cmp, dst, src1, src2, CMP_LE);1263BINARY_(uchar, float, float, run_cmp, dst, src1, src2, CMP_LE);12641265CV_Error(cv::Error::StsBadArg, "unsupported combination of types");1266}1267};12681269GAPI_FLUID_KERNEL(GFluidCmpLT, cv::gapi::core::GCmpLT, false)1270{1271static const int Window = 1;12721273static void run(const View &src1, const View &src2, Buffer &dst)1274{1275// DST SRC1 SRC2 OP __VA_ARGS__1276BINARY_(uchar, uchar , uchar , run_cmp, dst, src1, src2, CMP_LT);1277BINARY_(uchar, short, short, run_cmp, dst, src1, src2, CMP_LT);1278BINARY_(uchar, float, float, run_cmp, dst, src1, src2, CMP_LT);12791280CV_Error(cv::Error::StsBadArg, "unsupported combination of types");1281}1282};12831284//---------------------1285//1286// Compare with GScalar1287//1288//---------------------12891290template<typename DST, typename SRC, typename SCALAR=double>1291static void run_cmp(DST out[], const SRC in[], int length, Compare compare, SCALAR s)1292{1293switch (compare)1294{1295case CMP_EQ:1296for (int l=0; l < length; l++)1297out[l] = in[l] == s? 255: 0;1298break;1299case CMP_NE:1300for (int l=0; l < length; l++)1301out[l] = in[l] != s? 255: 0;1302break;1303case CMP_GE:1304for (int l=0; l < length; l++)1305out[l] = in[l] >= s? 255: 0;1306break;1307case CMP_LE:1308for (int l=0; l < length; l++)1309out[l] = in[l] <= s? 255: 0;1310break;1311case CMP_GT:1312for (int l=0; l < length; l++)1313out[l] = in[l] > s? 255: 0;1314break;1315case CMP_LT:1316for (int l=0; l < length; l++)1317out[l] = in[l] < s? 255: 0;1318break;1319default:1320CV_Error(cv::Error::StsBadArg, "unsupported compare operation");1321}1322}13231324template<typename DST, typename SRC>1325static void run_cmp(Buffer &dst, const View &src, Compare compare, const cv::Scalar &scalar)1326{1327static_assert(std::is_same<DST, uchar>::value, "wrong types");13281329const auto *in = src.InLine<SRC>(0);1330auto *out = dst.OutLine<DST>();13311332int width = dst.length();1333int chan = dst.meta().chan;13341335int length = width * chan;13361337// compute faster if scalar rounds to SRC1338double d = scalar[0] ;1339SRC s = static_cast<SRC>( scalar[0] );13401341if (s == d)1342run_cmp(out, in, length, compare, s);1343else1344run_cmp(out, in, length, compare, d);1345}13461347GAPI_FLUID_KERNEL(GFluidCmpEQScalar, cv::gapi::core::GCmpEQScalar, false)1348{1349static const int Window = 1;13501351static void run(const View &src, const cv::Scalar &scalar, Buffer &dst)1352{1353// DST SRC OP __VA_ARGS__1354UNARY_(uchar, uchar , run_cmp, dst, src, CMP_EQ, scalar);1355UNARY_(uchar, short, run_cmp, dst, src, CMP_EQ, scalar);1356UNARY_(uchar, float, run_cmp, dst, src, CMP_EQ, scalar);13571358CV_Error(cv::Error::StsBadArg, "unsupported combination of types");1359}1360};13611362GAPI_FLUID_KERNEL(GFluidCmpNEScalar, cv::gapi::core::GCmpNEScalar, false)1363{1364static const int Window = 1;13651366static void run(const View &src, const cv::Scalar &scalar, Buffer &dst)1367{1368// DST SRC OP __VA_ARGS__1369UNARY_(uchar, uchar , run_cmp, dst, src, CMP_NE, scalar);1370UNARY_(uchar, short, run_cmp, dst, src, CMP_NE, scalar);1371UNARY_(uchar, float, run_cmp, dst, src, CMP_NE, scalar);13721373CV_Error(cv::Error::StsBadArg, "unsupported combination of types");1374}1375};13761377GAPI_FLUID_KERNEL(GFluidCmpGEScalar, cv::gapi::core::GCmpGEScalar, false)1378{1379static const int Window = 1;13801381static void run(const View &src, const cv::Scalar &scalar, Buffer &dst)1382{1383// DST SRC OP __VA_ARGS__1384UNARY_(uchar, uchar , run_cmp, dst, src, CMP_GE, scalar);1385UNARY_(uchar, short, run_cmp, dst, src, CMP_GE, scalar);1386UNARY_(uchar, float, run_cmp, dst, src, CMP_GE, scalar);13871388CV_Error(cv::Error::StsBadArg, "unsupported combination of types");1389}1390};13911392GAPI_FLUID_KERNEL(GFluidCmpGTScalar, cv::gapi::core::GCmpGTScalar, false)1393{1394static const int Window = 1;13951396static void run(const View &src, const cv::Scalar &scalar, Buffer &dst)1397{1398// DST SRC OP __VA_ARGS__1399UNARY_(uchar, uchar , run_cmp, dst, src, CMP_GT, scalar);1400UNARY_(uchar, short, run_cmp, dst, src, CMP_GT, scalar);1401UNARY_(uchar, float, run_cmp, dst, src, CMP_GT, scalar);14021403CV_Error(cv::Error::StsBadArg, "unsupported combination of types");1404}1405};14061407GAPI_FLUID_KERNEL(GFluidCmpLEScalar, cv::gapi::core::GCmpLEScalar, false)1408{1409static const int Window = 1;14101411static void run(const View &src, const cv::Scalar &scalar, Buffer &dst)1412{1413// DST SRC OP __VA_ARGS__1414UNARY_(uchar, uchar , run_cmp, dst, src, CMP_LE, scalar);1415UNARY_(uchar, short, run_cmp, dst, src, CMP_LE, scalar);1416UNARY_(uchar, float, run_cmp, dst, src, CMP_LE, scalar);14171418CV_Error(cv::Error::StsBadArg, "unsupported combination of types");1419}1420};14211422GAPI_FLUID_KERNEL(GFluidCmpLTScalar, cv::gapi::core::GCmpLTScalar, false)1423{1424static const int Window = 1;14251426static void run(const View &src, const cv::Scalar &scalar, Buffer &dst)1427{1428// DST SRC OP __VA_ARGS__1429UNARY_(uchar, uchar , run_cmp, dst, src, CMP_LT, scalar);1430UNARY_(uchar, short, run_cmp, dst, src, CMP_LT, scalar);1431UNARY_(uchar, float, run_cmp, dst, src, CMP_LT, scalar);14321433CV_Error(cv::Error::StsBadArg, "unsupported combination of types");1434}1435};14361437//-------------------------1438//1439// Fluid kernels: threshold1440//1441//-------------------------14421443template<typename DST, typename SRC>1444static void run_threshold(Buffer &dst, const View &src, const cv::Scalar &thresh,1445const cv::Scalar &maxval,1446int type)1447{1448static_assert(std::is_same<DST, SRC>::value, "wrong types");14491450const auto *in = src.InLine<SRC>(0);1451auto *out = dst.OutLine<DST>();14521453int width = dst.length();1454int chan = dst.meta().chan;14551456int length = width * chan;14571458DST thresh_ = saturate<DST>(thresh[0], floord);1459DST threshd = saturate<DST>(thresh[0], roundd);1460DST maxvald = saturate<DST>(maxval[0], roundd);14611462switch (type)1463{1464case cv::THRESH_BINARY:1465for (int l=0; l < length; l++)1466out[l] = in[l] > thresh_? maxvald: 0;1467break;1468case cv::THRESH_BINARY_INV:1469for (int l=0; l < length; l++)1470out[l] = in[l] > thresh_? 0: maxvald;1471break;1472case cv::THRESH_TRUNC:1473for (int l=0; l < length; l++)1474out[l] = in[l] > thresh_? threshd: in[l];1475break;1476case cv::THRESH_TOZERO:1477for (int l=0; l < length; l++)1478out[l] = in[l] > thresh_? in[l]: 0;1479break;1480case cv::THRESH_TOZERO_INV:1481for (int l=0; l < length; l++)1482out[l] = in[l] > thresh_? 0: in[l];1483break;1484default: CV_Error(cv::Error::StsBadArg, "unsupported threshold type");1485}1486}14871488GAPI_FLUID_KERNEL(GFluidThreshold, cv::gapi::core::GThreshold, false)1489{1490static const int Window = 1;14911492static void run(const View &src, const cv::Scalar &thresh,1493const cv::Scalar &maxval,1494int type,1495Buffer &dst)1496{1497// DST SRC OP __VA_ARGS__1498UNARY_(uchar , uchar , run_threshold, dst, src, thresh, maxval, type);1499UNARY_(ushort, ushort, run_threshold, dst, src, thresh, maxval, type);1500UNARY_( short, short, run_threshold, dst, src, thresh, maxval, type);15011502CV_Error(cv::Error::StsBadArg, "unsupported combination of types");1503}1504};15051506//------------------------1507//1508// Fluid kernels: in-range1509//1510//------------------------15111512static void run_inrange3(uchar out[], const uchar in[], int width,1513const uchar lower[], const uchar upper[])1514{1515int w = 0; // cycle index15161517#if CV_SIMD1281518for (; w <= width-16; w+=16)1519{1520v_uint8x16 i0, i1, i2;1521v_load_deinterleave(&in[3*w], i0, i1, i2);15221523v_uint8x16 o;1524o = (i0 >= v_setall_u8(lower[0])) & (i0 <= v_setall_u8(upper[0])) &1525(i1 >= v_setall_u8(lower[1])) & (i1 <= v_setall_u8(upper[1])) &1526(i2 >= v_setall_u8(lower[2])) & (i2 <= v_setall_u8(upper[2]));15271528v_store(&out[w], o);1529}1530#endif15311532for (; w < width; w++)1533{1534out[w] = in[3*w ] >= lower[0] && in[3*w ] <= upper[0] &&1535in[3*w+1] >= lower[1] && in[3*w+1] <= upper[1] &&1536in[3*w+2] >= lower[2] && in[3*w+2] <= upper[2] ? 255: 0;1537}1538}15391540template<typename DST, typename SRC>1541static void run_inrange(Buffer &dst, const View &src, const cv::Scalar &upperb,1542const cv::Scalar &lowerb)1543{1544static_assert(std::is_same<DST, uchar>::value, "wrong types");1545static_assert(std::is_integral<SRC>::value, "wrong types");15461547const auto *in = src.InLine<SRC>(0);1548auto *out = dst.OutLine<DST>();15491550int width = src.length();1551int chan = src.meta().chan;1552GAPI_Assert(dst.meta().chan == 1);15531554// for integral input, in[i] >= lower equals in[i] >= ceil(lower)1555// so we can optimize compare operations by rounding lower/upper1556SRC lower[4], upper[4];1557for (int c=0; c < chan; c++)1558{1559lower[c] = saturate<SRC>(lowerb[c], ceild);1560upper[c] = saturate<SRC>(upperb[c], floord);1561}15621563// manually SIMD for important case if RGB/BGR1564if (std::is_same<SRC,uchar>::value && chan==3)1565{1566run_inrange3((uchar*)out, (const uchar*)in, width,1567(const uchar*)lower, (const uchar*)upper);1568return;1569}15701571// TODO: please manually SIMD if multiple channels:1572// modern compilers would perfectly vectorize this code if one channel,1573// but may need help with de-interleaving channels if RGB/BGR image etc1574switch (chan)1575{1576case 1:1577for (int w=0; w < width; w++)1578out[w] = in[w] >= lower[0] && in[w] <= upper[0]? 255: 0;1579break;1580case 2:1581for (int w=0; w < width; w++)1582out[w] = in[2*w ] >= lower[0] && in[2*w ] <= upper[0] &&1583in[2*w+1] >= lower[1] && in[2*w+1] <= upper[1] ? 255: 0;1584break;1585case 3:1586for (int w=0; w < width; w++)1587out[w] = in[3*w ] >= lower[0] && in[3*w ] <= upper[0] &&1588in[3*w+1] >= lower[1] && in[3*w+1] <= upper[1] &&1589in[3*w+2] >= lower[2] && in[3*w+2] <= upper[2] ? 255: 0;1590break;1591case 4:1592for (int w=0; w < width; w++)1593out[w] = in[4*w ] >= lower[0] && in[4*w ] <= upper[0] &&1594in[4*w+1] >= lower[1] && in[4*w+1] <= upper[1] &&1595in[4*w+2] >= lower[2] && in[4*w+2] <= upper[2] &&1596in[4*w+3] >= lower[3] && in[4*w+3] <= upper[3] ? 255: 0;1597break;1598default: CV_Error(cv::Error::StsBadArg, "unsupported number of channels");1599}1600}16011602GAPI_FLUID_KERNEL(GFluidInRange, cv::gapi::core::GInRange, false)1603{1604static const int Window = 1;16051606static void run(const View &src, const cv::Scalar &lowerb, const cv::Scalar& upperb,1607Buffer &dst)1608{1609// DST SRC OP __VA_ARGS__1610INRANGE_(uchar, uchar , run_inrange, dst, src, upperb, lowerb);1611INRANGE_(uchar, ushort, run_inrange, dst, src, upperb, lowerb);1612INRANGE_(uchar, short, run_inrange, dst, src, upperb, lowerb);16131614CV_Error(cv::Error::StsBadArg, "unsupported combination of types");1615}1616};16171618//----------------------1619//1620// Fluid kernels: select1621//1622//----------------------16231624// manually vectored function for important case if RGB/BGR image1625static void run_select_row3(int width, uchar out[], uchar in1[], uchar in2[], uchar in3[])1626{1627int w = 0; // cycle index16281629#if CV_SIMD1281630for (; w <= width-16; w+=16)1631{1632v_uint8x16 a1, b1, c1;1633v_uint8x16 a2, b2, c2;1634v_uint8x16 mask;1635v_uint8x16 a, b, c;16361637v_load_deinterleave(&in1[3*w], a1, b1, c1);1638v_load_deinterleave(&in2[3*w], a2, b2, c2);16391640mask = v_load(&in3[w]);1641mask = mask != v_setzero_u8();16421643a = v_select(mask, a1, a2);1644b = v_select(mask, b1, b2);1645c = v_select(mask, c1, c2);16461647v_store_interleave(&out[3*w], a, b, c);1648}1649#endif16501651for (; w < width; w++)1652{1653out[3*w ] = in3[w]? in1[3*w ]: in2[3*w ];1654out[3*w + 1] = in3[w]? in1[3*w + 1]: in2[3*w + 1];1655out[3*w + 2] = in3[w]? in1[3*w + 2]: in2[3*w + 2];1656}1657}16581659// parameter chan is compile-time known constant, normally chan=1..41660template<int chan, typename DST, typename SRC1, typename SRC2, typename SRC3>1661static void run_select_row(int width, DST out[], SRC1 in1[], SRC2 in2[], SRC3 in3[])1662{1663if (std::is_same<DST,uchar>::value && chan==3)1664{1665// manually vectored function for important case if RGB/BGR image1666run_select_row3(width, (uchar*)out, (uchar*)in1, (uchar*)in2, (uchar*)in3);1667return;1668}16691670// because `chan` is template parameter, its value is known at compilation time,1671// so that modern compilers would efficiently vectorize this cycle if chan==11672// (if chan>1, compilers may need help with de-interleaving of the channels)1673for (int w=0; w < width; w++)1674{1675for (int c=0; c < chan; c++)1676{1677out[w*chan + c] = in3[w]? in1[w*chan + c]: in2[w*chan + c];1678}1679}1680}16811682template<typename DST, typename SRC1, typename SRC2, typename SRC3>1683static void run_select(Buffer &dst, const View &src1, const View &src2, const View &src3)1684{1685static_assert(std::is_same<DST , SRC1>::value, "wrong types");1686static_assert(std::is_same<DST , SRC2>::value, "wrong types");1687static_assert(std::is_same<uchar, SRC3>::value, "wrong types");16881689auto *out = dst.OutLine<DST>();16901691const auto *in1 = src1.InLine<SRC1>(0);1692const auto *in2 = src2.InLine<SRC2>(0);1693const auto *in3 = src3.InLine<SRC3>(0);16941695int width = dst.length();1696int chan = dst.meta().chan;16971698switch (chan)1699{1700case 1: run_select_row<1>(width, out, in1, in2, in3); break;1701case 2: run_select_row<2>(width, out, in1, in2, in3); break;1702case 3: run_select_row<3>(width, out, in1, in2, in3); break;1703case 4: run_select_row<4>(width, out, in1, in2, in3); break;1704default: CV_Error(cv::Error::StsBadArg, "unsupported number of channels");1705}1706}17071708GAPI_FLUID_KERNEL(GFluidSelect, cv::gapi::core::GSelect, false)1709{1710static const int Window = 1;17111712static void run(const View &src1, const View &src2, const View &src3, Buffer &dst)1713{1714// DST SRC1 SRC2 SRC3 OP __VA_ARGS__1715SELECT_(uchar , uchar , uchar , uchar, run_select, dst, src1, src2, src3);1716SELECT_(ushort, ushort, ushort, uchar, run_select, dst, src1, src2, src3);1717SELECT_( short, short, short, uchar, run_select, dst, src1, src2, src3);17181719CV_Error(cv::Error::StsBadArg, "unsupported combination of types");1720}1721};17221723//----------------------------------------------------1724//1725// Fluid kernels: split, merge, polat2cart, cart2polar1726//1727//----------------------------------------------------17281729GAPI_FLUID_KERNEL(GFluidSplit3, cv::gapi::core::GSplit3, false)1730{1731static const int Window = 1;17321733static void run(const View &src, Buffer &dst1, Buffer &dst2, Buffer &dst3)1734{1735const auto *in = src.InLine<uchar>(0);1736auto *out1 = dst1.OutLine<uchar>();1737auto *out2 = dst2.OutLine<uchar>();1738auto *out3 = dst3.OutLine<uchar>();17391740GAPI_Assert(3 == src.meta().chan);1741int width = src.length();17421743int w = 0; // cycle counter17441745#if CV_SIMD1281746for (; w <= width-16; w+=16)1747{1748v_uint8x16 a, b, c;1749v_load_deinterleave(&in[3*w], a, b, c);1750v_store(&out1[w], a);1751v_store(&out2[w], b);1752v_store(&out3[w], c);1753}1754#endif17551756for (; w < width; w++)1757{1758out1[w] = in[3*w ];1759out2[w] = in[3*w + 1];1760out3[w] = in[3*w + 2];1761}1762}1763};17641765GAPI_FLUID_KERNEL(GFluidSplit4, cv::gapi::core::GSplit4, false)1766{1767static const int Window = 1;17681769static void run(const View &src, Buffer &dst1, Buffer &dst2, Buffer &dst3, Buffer &dst4)1770{1771const auto *in = src.InLine<uchar>(0);1772auto *out1 = dst1.OutLine<uchar>();1773auto *out2 = dst2.OutLine<uchar>();1774auto *out3 = dst3.OutLine<uchar>();1775auto *out4 = dst4.OutLine<uchar>();17761777GAPI_Assert(4 == src.meta().chan);1778int width = src.length();17791780int w = 0; // cycle counter17811782#if CV_SIMD1281783for (; w <= width-16; w+=16)1784{1785v_uint8x16 a, b, c, d;1786v_load_deinterleave(&in[4*w], a, b, c, d);1787v_store(&out1[w], a);1788v_store(&out2[w], b);1789v_store(&out3[w], c);1790v_store(&out4[w], d);1791}1792#endif17931794for (; w < width; w++)1795{1796out1[w] = in[4*w ];1797out2[w] = in[4*w + 1];1798out3[w] = in[4*w + 2];1799out4[w] = in[4*w + 3];1800}1801}1802};18031804GAPI_FLUID_KERNEL(GFluidMerge3, cv::gapi::core::GMerge3, false)1805{1806static const int Window = 1;18071808static void run(const View &src1, const View &src2, const View &src3, Buffer &dst)1809{1810const auto *in1 = src1.InLine<uchar>(0);1811const auto *in2 = src2.InLine<uchar>(0);1812const auto *in3 = src3.InLine<uchar>(0);1813auto *out = dst.OutLine<uchar>();18141815GAPI_Assert(3 == dst.meta().chan);1816int width = dst.length();18171818int w = 0; // cycle counter18191820#if CV_SIMD1281821for (; w <= width-16; w+=16)1822{1823v_uint8x16 a, b, c;1824a = v_load(&in1[w]);1825b = v_load(&in2[w]);1826c = v_load(&in3[w]);1827v_store_interleave(&out[3*w], a, b, c);1828}1829#endif18301831for (; w < width; w++)1832{1833out[3*w ] = in1[w];1834out[3*w + 1] = in2[w];1835out[3*w + 2] = in3[w];1836}1837}1838};18391840GAPI_FLUID_KERNEL(GFluidMerge4, cv::gapi::core::GMerge4, false)1841{1842static const int Window = 1;18431844static void run(const View &src1, const View &src2, const View &src3, const View &src4,1845Buffer &dst)1846{1847const auto *in1 = src1.InLine<uchar>(0);1848const auto *in2 = src2.InLine<uchar>(0);1849const auto *in3 = src3.InLine<uchar>(0);1850const auto *in4 = src4.InLine<uchar>(0);1851auto *out = dst.OutLine<uchar>();18521853GAPI_Assert(4 == dst.meta().chan);1854int width = dst.length();18551856int w = 0; // cycle counter18571858#if CV_SIMD1281859for (; w <= width-16; w+=16)1860{1861v_uint8x16 a, b, c, d;1862a = v_load(&in1[w]);1863b = v_load(&in2[w]);1864c = v_load(&in3[w]);1865d = v_load(&in4[w]);1866v_store_interleave(&out[4*w], a, b, c, d);1867}1868#endif18691870for (; w < width; w++)1871{1872out[4*w ] = in1[w];1873out[4*w + 1] = in2[w];1874out[4*w + 2] = in3[w];1875out[4*w + 3] = in4[w];1876}1877}1878};18791880GAPI_FLUID_KERNEL(GFluidPolarToCart, cv::gapi::core::GPolarToCart, false)1881{1882static const int Window = 1;18831884static void run(const View &src1, const View &src2, bool angleInDegrees,1885Buffer &dst1, Buffer &dst2)1886{1887GAPI_Assert(src1.meta().depth == CV_32F);1888GAPI_Assert(src2.meta().depth == CV_32F);1889GAPI_Assert(dst1.meta().depth == CV_32F);1890GAPI_Assert(dst2.meta().depth == CV_32F);18911892const auto * in1 = src1.InLine<float>(0);1893const auto * in2 = src2.InLine<float>(0);1894auto *out1 = dst1.OutLine<float>();1895auto *out2 = dst2.OutLine<float>();18961897int width = src1.length();1898int chan = src2.meta().chan;1899int length = width * chan;19001901// SIMD: compiler vectoring!1902for (int l=0; l < length; l++)1903{1904float angle = angleInDegrees?1905in2[l] * static_cast<float>(CV_PI / 180):1906in2[l];1907float magnitude = in1[l];1908float x = magnitude * std::cos(angle);1909float y = magnitude * std::sin(angle);1910out1[l] = x;1911out2[l] = y;1912}1913}1914};19151916GAPI_FLUID_KERNEL(GFluidCartToPolar, cv::gapi::core::GCartToPolar, false)1917{1918static const int Window = 1;19191920static void run(const View &src1, const View &src2, bool angleInDegrees,1921Buffer &dst1, Buffer &dst2)1922{1923GAPI_Assert(src1.meta().depth == CV_32F);1924GAPI_Assert(src2.meta().depth == CV_32F);1925GAPI_Assert(dst1.meta().depth == CV_32F);1926GAPI_Assert(dst2.meta().depth == CV_32F);19271928const auto * in1 = src1.InLine<float>(0);1929const auto * in2 = src2.InLine<float>(0);1930auto *out1 = dst1.OutLine<float>();1931auto *out2 = dst2.OutLine<float>();19321933int width = src1.length();1934int chan = src2.meta().chan;1935int length = width * chan;19361937// SIMD: compiler vectoring!1938for (int l=0; l < length; l++)1939{1940float x = in1[l];1941float y = in2[l];1942float magnitude = std::hypot(y, x);1943float angle_rad = std::atan2(y, x);1944float angle = angleInDegrees?1945angle_rad * static_cast<float>(180 / CV_PI):1946angle_rad;1947out1[l] = magnitude;1948out2[l] = angle;1949}1950}1951};19521953GAPI_FLUID_KERNEL(GFluidResize, cv::gapi::core::GResize, true)1954{1955static const int Window = 1;1956static const auto Kind = GFluidKernel::Kind::Resize;19571958constexpr static const int INTER_RESIZE_COEF_BITS = 11;1959constexpr static const int INTER_RESIZE_COEF_SCALE = 1 << INTER_RESIZE_COEF_BITS;1960constexpr static const short ONE = INTER_RESIZE_COEF_SCALE;19611962struct ResizeUnit1963{1964short alpha0;1965short alpha1;1966int s0;1967int s1;1968};19691970static ResizeUnit map(double ratio, int start, int max, int outCoord)1971{1972float f = static_cast<float>((outCoord + 0.5f) * ratio - 0.5f);1973int s = cvFloor(f);1974f -= s;19751976ResizeUnit ru;19771978ru.s0 = std::max(s - start, 0);1979ru.s1 = ((f == 0.0) || s + 1 >= max) ? s - start : s - start + 1;19801981ru.alpha0 = saturate_cast<short>((1.0f - f) * INTER_RESIZE_COEF_SCALE);1982ru.alpha1 = saturate_cast<short>((f) * INTER_RESIZE_COEF_SCALE);19831984return ru;1985}19861987static void initScratch(const cv::GMatDesc& in,1988cv::Size outSz, double /*fx*/, double /*fy*/, int /*interp*/,1989cv::gapi::fluid::Buffer &scratch)1990{1991CV_Assert(in.depth == CV_8U && in.chan == 3);19921993cv::Size scratch_size{static_cast<int>(outSz.width * sizeof(ResizeUnit)), 1};19941995cv::GMatDesc desc;1996desc.chan = 1;1997desc.depth = CV_8UC1;1998desc.size = to_own(scratch_size);19992000cv::gapi::fluid::Buffer buffer(desc);2001scratch = std::move(buffer);20022003ResizeUnit* mapX = scratch.OutLine<ResizeUnit>();2004double hRatio = (double)in.size.width / outSz.width;20052006for (int x = 0, w = outSz.width; x < w; x++)2007{2008mapX[x] = map(hRatio, 0, in.size.width, x);2009}2010}20112012static void resetScratch(cv::gapi::fluid::Buffer& /*scratch*/)2013{}20142015static void run(const cv::gapi::fluid::View& in, cv::Size /*sz*/, double /*fx*/, double /*fy*/, int /*interp*/,2016cv::gapi::fluid::Buffer& out, cv::gapi::fluid::Buffer &scratch)2017{2018double vRatio = (double)in.meta().size.height / out.meta().size.height;2019auto mapY = map(vRatio, in.y(), in.meta().size.height, out.y());20202021auto beta0 = mapY.alpha0;2022auto beta1 = mapY.alpha1;20232024const auto src0 = in.InLine <unsigned char>(mapY.s0);2025const auto src1 = in.InLine <unsigned char>(mapY.s1);20262027auto dst = out.OutLine<unsigned char>();20282029ResizeUnit* mapX = scratch.OutLine<ResizeUnit>();20302031for (int x = 0; x < out.length(); x++)2032{2033short alpha0 = mapX[x].alpha0;2034short alpha1 = mapX[x].alpha1;2035int sx0 = mapX[x].s0;2036int sx1 = mapX[x].s1;20372038int res00 = src0[3*sx0 ]*alpha0 + src0[3*(sx1) ]*alpha1;2039int res10 = src1[3*sx0 ]*alpha0 + src1[3*(sx1) ]*alpha1;20402041int res01 = src0[3*sx0 + 1]*alpha0 + src0[3*(sx1) + 1]*alpha1;2042int res11 = src1[3*sx0 + 1]*alpha0 + src1[3*(sx1) + 1]*alpha1;20432044int res02 = src0[3*sx0 + 2]*alpha0 + src0[3*(sx1) + 2]*alpha1;2045int res12 = src1[3*sx0 + 2]*alpha0 + src1[3*(sx1) + 2]*alpha1;20462047dst[3*x ] = uchar(( ((beta0 * (res00 >> 4)) >> 16) + ((beta1 * (res10 >> 4)) >> 16) + 2)>>2);2048dst[3*x + 1] = uchar(( ((beta0 * (res01 >> 4)) >> 16) + ((beta1 * (res11 >> 4)) >> 16) + 2)>>2);2049dst[3*x + 2] = uchar(( ((beta0 * (res02 >> 4)) >> 16) + ((beta1 * (res12 >> 4)) >> 16) + 2)>>2);2050}2051}2052};20532054} // namespace fliud2055} // namespace gapi2056} // namespace cv20572058cv::gapi::GKernelPackage cv::gapi::core::fluid::kernels()2059{2060using namespace cv::gapi::fluid;20612062return cv::gapi::kernels2063< GFluidAdd2064,GFluidSub2065,GFluidMul2066,GFluidDiv2067,GFluidAbsDiff2068,GFluidAnd2069,GFluidOr2070,GFluidXor2071,GFluidMin2072,GFluidMax2073,GFluidCmpGT2074,GFluidCmpGE2075,GFluidCmpLE2076,GFluidCmpLT2077,GFluidCmpEQ2078,GFluidCmpNE2079,GFluidAddW2080,GFluidNot2081,GFluidLUT2082,GFluidConvertTo2083,GFluidSplit32084,GFluidSplit42085,GFluidMerge32086,GFluidMerge42087,GFluidSelect2088,GFluidPolarToCart2089,GFluidCartToPolar2090,GFluidAddC2091,GFluidSubC2092,GFluidSubRC2093,GFluidMulC2094,GFluidMulCOld2095,GFluidDivC2096,GFluidDivRC2097,GFluidAbsDiffC2098,GFluidCmpGTScalar2099,GFluidCmpGEScalar2100,GFluidCmpLEScalar2101,GFluidCmpLTScalar2102,GFluidCmpEQScalar2103,GFluidCmpNEScalar2104,GFluidThreshold2105,GFluidInRange2106,GFluidResize2107#if 02108,GFluidMean -- not fluid2109,GFluidSum -- not fluid2110,GFluidNormL1 -- not fluid2111,GFluidNormL2 -- not fluid2112,GFluidNormInf -- not fluid2113,GFluidIntegral -- not fluid2114,GFluidThresholdOT -- not fluid2115,GFluidResize -- not fluid (?)2116,GFluidRemap -- not fluid2117,GFluidFlip -- not fluid2118,GFluidCrop -- not fluid2119,GFluidConcatHor2120,GFluidConcatVert -- not fluid2121#endif2122>();2123}21242125#endif // !defined(GAPI_STANDALONE)212621272128