Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/gapi/src/backends/fluid/gfluidcore.cpp
16345 views
1
// This file is part of OpenCV project.
2
// It is subject to the license terms in the LICENSE file found in the top-level directory
3
// of this distribution and at http://opencv.org/license.html.
4
//
5
// Copyright (C) 2018 Intel Corporation
6
7
#if !defined(GAPI_STANDALONE)
8
9
#include "precomp.hpp"
10
11
#include "opencv2/gapi/own/assert.hpp"
12
#include "opencv2/core/traits.hpp"
13
#include "opencv2/core/hal/intrin.hpp"
14
15
#include "opencv2/gapi/core.hpp"
16
17
#include "opencv2/gapi/fluid/gfluidbuffer.hpp"
18
#include "opencv2/gapi/fluid/gfluidkernel.hpp"
19
20
#include "gfluidbuffer_priv.hpp"
21
#include "gfluidbackend.hpp"
22
#include "gfluidutils.hpp"
23
#include "gfluidcore.hpp"
24
25
#include <cassert>
26
#include <cmath>
27
#include <cstdlib>
28
29
namespace cv {
30
namespace gapi {
31
namespace fluid {
32
33
//---------------------
34
//
35
// Arithmetic functions
36
//
37
//---------------------
38
39
template<typename DST, typename SRC1, typename SRC2>
40
static inline DST absdiff(SRC1 x, SRC2 y)
41
{
42
auto result = x > y? x - y: y - x;
43
return saturate<DST>(result, roundf);
44
}
45
46
template<typename DST, typename SRC1, typename SRC2>
47
static inline DST addWeighted(SRC1 src1, SRC2 src2, float alpha, float beta, float gamma)
48
{
49
float dst = src1*alpha + src2*beta + gamma;
50
return saturate<DST>(dst, roundf);
51
}
52
53
template<typename DST, typename SRC1, typename SRC2>
54
static inline DST add(SRC1 x, SRC2 y)
55
{
56
return saturate<DST>(x + y, roundf);
57
}
58
59
template<typename DST, typename SRC1, typename SRC2>
60
static inline DST sub(SRC1 x, SRC2 y)
61
{
62
return saturate<DST>(x - y, roundf);
63
}
64
65
template<typename DST, typename SRC1, typename SRC2>
66
static inline DST subr(SRC1 x, SRC2 y)
67
{
68
return saturate<DST>(y - x, roundf); // reverse: y - x
69
}
70
71
template<typename DST, typename SRC1, typename SRC2>
72
static inline DST mul(SRC1 x, SRC2 y, float scale=1)
73
{
74
auto result = scale * x * y;
75
return saturate<DST>(result, rintf);
76
}
77
78
template<typename DST, typename SRC1, typename SRC2>
79
static inline DST div(SRC1 x, SRC2 y, float scale=1)
80
{
81
// like OpenCV: returns 0, if y=0
82
auto result = y? scale * x / y: 0;
83
return saturate<DST>(result, rintf);
84
}
85
86
template<typename DST, typename SRC1, typename SRC2>
87
static inline DST divr(SRC1 x, SRC2 y, float scale=1)
88
{
89
auto result = x? scale * y / x: 0; // reverse: y / x
90
return saturate<DST>(result, rintf);
91
}
92
93
//---------------------------
94
//
95
// Fluid kernels: addWeighted
96
//
97
//---------------------------
98
99
template<typename DST, typename SRC1, typename SRC2>
100
static void run_addweighted(Buffer &dst, const View &src1, const View &src2,
101
double alpha, double beta, double gamma)
102
{
103
static_assert(std::is_same<SRC1, SRC2>::value, "wrong types");
104
105
const auto *in1 = src1.InLine<SRC1>(0);
106
const auto *in2 = src2.InLine<SRC2>(0);
107
auto *out = dst.OutLine<DST>();
108
109
int width = dst.length();
110
int chan = dst.meta().chan;
111
int length = width * chan;
112
113
// NB: assume in/out types are not 64-bits
114
auto _alpha = static_cast<float>( alpha );
115
auto _beta = static_cast<float>( beta );
116
auto _gamma = static_cast<float>( gamma );
117
118
for (int l=0; l < length; l++)
119
out[l] = addWeighted<DST>(in1[l], in2[l], _alpha, _beta, _gamma);
120
}
121
122
GAPI_FLUID_KERNEL(GFluidAddW, cv::gapi::core::GAddW, false)
123
{
124
static const int Window = 1;
125
126
static void run(const View &src1, double alpha, const View &src2,
127
double beta, double gamma, int /*dtype*/,
128
Buffer &dst)
129
{
130
// DST SRC1 SRC2 OP __VA_ARGS__
131
BINARY_(uchar , uchar , uchar , run_addweighted, dst, src1, src2, alpha, beta, gamma);
132
BINARY_(uchar , ushort, ushort, run_addweighted, dst, src1, src2, alpha, beta, gamma);
133
BINARY_(uchar , short, short, run_addweighted, dst, src1, src2, alpha, beta, gamma);
134
BINARY_( short, short, short, run_addweighted, dst, src1, src2, alpha, beta, gamma);
135
BINARY_(ushort, ushort, ushort, run_addweighted, dst, src1, src2, alpha, beta, gamma);
136
BINARY_( float, uchar , uchar , run_addweighted, dst, src1, src2, alpha, beta, gamma);
137
BINARY_( float, ushort, ushort, run_addweighted, dst, src1, src2, alpha, beta, gamma);
138
BINARY_( float, short, short, run_addweighted, dst, src1, src2, alpha, beta, gamma);
139
140
CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
141
}
142
};
143
144
//--------------------------
145
//
146
// Fluid kernels: +, -, *, /
147
//
148
//--------------------------
149
150
enum Arithm { ARITHM_ABSDIFF, ARITHM_ADD, ARITHM_SUBTRACT, ARITHM_MULTIPLY, ARITHM_DIVIDE };
151
152
template<typename DST, typename SRC1, typename SRC2>
153
static void run_arithm(Buffer &dst, const View &src1, const View &src2, Arithm arithm,
154
double scale=1)
155
{
156
static_assert(std::is_same<SRC1, SRC2>::value, "wrong types");
157
158
const auto *in1 = src1.InLine<SRC1>(0);
159
const auto *in2 = src2.InLine<SRC2>(0);
160
auto *out = dst.OutLine<DST>();
161
162
int width = dst.length();
163
int chan = dst.meta().chan;
164
int length = width * chan;
165
166
// NB: assume in/out types are not 64-bits
167
float _scale = static_cast<float>( scale );
168
169
switch (arithm)
170
{
171
case ARITHM_ABSDIFF:
172
for (int l=0; l < length; l++)
173
out[l] = absdiff<DST>(in1[l], in2[l]);
174
break;
175
case ARITHM_ADD:
176
for (int l=0; l < length; l++)
177
out[l] = add<DST>(in1[l], in2[l]);
178
break;
179
case ARITHM_SUBTRACT:
180
for (int l=0; l < length; l++)
181
out[l] = sub<DST>(in1[l], in2[l]);
182
break;
183
case ARITHM_MULTIPLY:
184
for (int l=0; l < length; l++)
185
out[l] = mul<DST>(in1[l], in2[l], _scale);
186
break;
187
case ARITHM_DIVIDE:
188
for (int l=0; l < length; l++)
189
out[l] = div<DST>(in1[l], in2[l], _scale);
190
break;
191
default: CV_Error(cv::Error::StsBadArg, "unsupported arithmetic operation");
192
}
193
}
194
195
GAPI_FLUID_KERNEL(GFluidAdd, cv::gapi::core::GAdd, false)
196
{
197
static const int Window = 1;
198
199
static void run(const View &src1, const View &src2, int /*dtype*/, Buffer &dst)
200
{
201
// DST SRC1 SRC2 OP __VA_ARGS__
202
BINARY_(uchar , uchar , uchar , run_arithm, dst, src1, src2, ARITHM_ADD);
203
BINARY_(uchar , short, short, run_arithm, dst, src1, src2, ARITHM_ADD);
204
BINARY_(uchar , float, float, run_arithm, dst, src1, src2, ARITHM_ADD);
205
BINARY_( short, short, short, run_arithm, dst, src1, src2, ARITHM_ADD);
206
BINARY_( float, uchar , uchar , run_arithm, dst, src1, src2, ARITHM_ADD);
207
BINARY_( float, short, short, run_arithm, dst, src1, src2, ARITHM_ADD);
208
BINARY_( float, float, float, run_arithm, dst, src1, src2, ARITHM_ADD);
209
210
CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
211
}
212
};
213
214
GAPI_FLUID_KERNEL(GFluidSub, cv::gapi::core::GSub, false)
215
{
216
static const int Window = 1;
217
218
static void run(const View &src1, const View &src2, int /*dtype*/, Buffer &dst)
219
{
220
// DST SRC1 SRC2 OP __VA_ARGS__
221
BINARY_(uchar , uchar , uchar , run_arithm, dst, src1, src2, ARITHM_SUBTRACT);
222
BINARY_(uchar , short, short, run_arithm, dst, src1, src2, ARITHM_SUBTRACT);
223
BINARY_(uchar , float, float, run_arithm, dst, src1, src2, ARITHM_SUBTRACT);
224
BINARY_( short, short, short, run_arithm, dst, src1, src2, ARITHM_SUBTRACT);
225
BINARY_( float, uchar , uchar , run_arithm, dst, src1, src2, ARITHM_SUBTRACT);
226
BINARY_( float, short, short, run_arithm, dst, src1, src2, ARITHM_SUBTRACT);
227
BINARY_( float, float, float, run_arithm, dst, src1, src2, ARITHM_SUBTRACT);
228
229
CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
230
}
231
};
232
233
GAPI_FLUID_KERNEL(GFluidMul, cv::gapi::core::GMul, false)
234
{
235
static const int Window = 1;
236
237
static void run(const View &src1, const View &src2, double scale, int /*dtype*/, Buffer &dst)
238
{
239
// DST SRC1 SRC2 OP __VA_ARGS__
240
BINARY_(uchar , uchar , uchar , run_arithm, dst, src1, src2, ARITHM_MULTIPLY, scale);
241
BINARY_(uchar , short, short, run_arithm, dst, src1, src2, ARITHM_MULTIPLY, scale);
242
BINARY_(uchar , float, float, run_arithm, dst, src1, src2, ARITHM_MULTIPLY, scale);
243
BINARY_( short, short, short, run_arithm, dst, src1, src2, ARITHM_MULTIPLY, scale);
244
BINARY_( float, uchar , uchar , run_arithm, dst, src1, src2, ARITHM_MULTIPLY, scale);
245
BINARY_( float, short, short, run_arithm, dst, src1, src2, ARITHM_MULTIPLY, scale);
246
BINARY_( float, float, float, run_arithm, dst, src1, src2, ARITHM_MULTIPLY, scale);
247
248
CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
249
}
250
};
251
252
GAPI_FLUID_KERNEL(GFluidDiv, cv::gapi::core::GDiv, false)
253
{
254
static const int Window = 1;
255
256
static void run(const View &src1, const View &src2, double scale, int /*dtype*/, Buffer &dst)
257
{
258
// DST SRC1 SRC2 OP __VA_ARGS__
259
BINARY_(uchar , uchar , uchar , run_arithm, dst, src1, src2, ARITHM_DIVIDE, scale);
260
BINARY_(uchar , short, short, run_arithm, dst, src1, src2, ARITHM_DIVIDE, scale);
261
BINARY_(uchar , float, float, run_arithm, dst, src1, src2, ARITHM_DIVIDE, scale);
262
BINARY_( short, short, short, run_arithm, dst, src1, src2, ARITHM_DIVIDE, scale);
263
BINARY_( float, uchar , uchar , run_arithm, dst, src1, src2, ARITHM_DIVIDE, scale);
264
BINARY_( float, short, short, run_arithm, dst, src1, src2, ARITHM_DIVIDE, scale);
265
BINARY_( float, float, float, run_arithm, dst, src1, src2, ARITHM_DIVIDE, scale);
266
267
CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
268
}
269
};
270
271
GAPI_FLUID_KERNEL(GFluidAbsDiff, cv::gapi::core::GAbsDiff, false)
272
{
273
static const int Window = 1;
274
275
static void run(const View &src1, const View &src2, Buffer &dst)
276
{
277
// DST SRC1 SRC2 OP __VA_ARGS__
278
BINARY_(uchar , uchar , uchar , run_arithm, dst, src1, src2, ARITHM_ABSDIFF);
279
BINARY_(ushort, ushort, ushort, run_arithm, dst, src1, src2, ARITHM_ABSDIFF);
280
BINARY_( short, short, short, run_arithm, dst, src1, src2, ARITHM_ABSDIFF);
281
BINARY_( float, float, float, run_arithm, dst, src1, src2, ARITHM_ABSDIFF);
282
283
CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
284
}
285
};
286
287
//--------------------------------------
288
//
289
// Fluid kernels: +, -, *, / with Scalar
290
//
291
//--------------------------------------
292
293
static inline v_uint16x8 v_add_16u(const v_uint16x8 &x, const v_uint16x8 &y) { return x + y; }
294
static inline v_uint16x8 v_sub_16u(const v_uint16x8 &x, const v_uint16x8 &y) { return x - y; }
295
static inline v_uint16x8 v_subr_16u(const v_uint16x8 &x, const v_uint16x8 &y) { return y - x; }
296
297
static inline v_float32x4 v_add_32f(const v_float32x4 &x, const v_float32x4 &y) { return x + y; }
298
static inline v_float32x4 v_sub_32f(const v_float32x4 &x, const v_float32x4 &y) { return x - y; }
299
static inline v_float32x4 v_subr_32f(const v_float32x4 &x, const v_float32x4 &y) { return y - x; }
300
301
static inline int s_add_8u(uchar x, uchar y) { return x + y; }
302
static inline int s_sub_8u(uchar x, uchar y) { return x - y; }
303
static inline int s_subr_8u(uchar x, uchar y) { return y - x; }
304
305
static inline float s_add_32f(float x, float y) { return x + y; }
306
static inline float s_sub_32f(float x, float y) { return x - y; }
307
static inline float s_subr_32f(float x, float y) { return y - x; }
308
309
// manual SIMD if important case 8UC3
310
static void run_arithm_s3(uchar out[], const uchar in[], int width, const uchar scalar[],
311
v_uint16x8 (*v_op)(const v_uint16x8&, const v_uint16x8&),
312
int (*s_op)(uchar, uchar))
313
{
314
int w = 0;
315
316
#if CV_SIMD128
317
for (; w <= width-16; w+=16)
318
{
319
v_uint8x16 x, y, z;
320
v_load_deinterleave(&in[3*w], x, y, z);
321
322
v_uint16x8 r0, r1;
323
324
v_expand(x, r0, r1);
325
r0 = v_op(r0, v_setall_u16(scalar[0])); // x + scalar[0]
326
r1 = v_op(r1, v_setall_u16(scalar[0]));
327
x = v_pack(r0, r1);
328
329
v_expand(y, r0, r1);
330
r0 = v_op(r0, v_setall_u16(scalar[1])); // y + scalar[1]
331
r1 = v_op(r1, v_setall_u16(scalar[1]));
332
y = v_pack(r0, r1);
333
334
v_expand(z, r0, r1);
335
r0 = v_op(r0, v_setall_u16(scalar[2])); // z + scalar[2]
336
r1 = v_op(r1, v_setall_u16(scalar[2]));
337
z = v_pack(r0, r1);
338
339
v_store_interleave(&out[3*w], x, y, z);
340
}
341
#endif
342
UNUSED(v_op);
343
for (; w < width; w++)
344
{
345
out[3*w ] = saturate<uchar>( s_op(in[3*w ], scalar[0]) );
346
out[3*w + 1] = saturate<uchar>( s_op(in[3*w + 1], scalar[1]) );
347
out[3*w + 2] = saturate<uchar>( s_op(in[3*w + 2], scalar[2]) );
348
}
349
}
350
351
// manually SIMD if rounding 32F into 8U, single channel
352
static void run_arithm_s1(uchar out[], const float in[], int width, const float scalar[],
353
v_float32x4 (*v_op)(const v_float32x4&, const v_float32x4&),
354
float (*s_op)(float, float))
355
{
356
int w = 0;
357
358
#if CV_SIMD128
359
for (; w <= width-16; w+=16)
360
{
361
v_float32x4 r0, r1, r2, r3;
362
r0 = v_load(&in[w ]);
363
r1 = v_load(&in[w + 4]);
364
r2 = v_load(&in[w + 8]);
365
r3 = v_load(&in[w + 12]);
366
367
r0 = v_op(r0, v_setall_f32(scalar[0])); // r + scalar[0]
368
r1 = v_op(r1, v_setall_f32(scalar[0]));
369
r2 = v_op(r2, v_setall_f32(scalar[0]));
370
r3 = v_op(r3, v_setall_f32(scalar[0]));
371
372
v_int32x4 i0, i1, i2, i3;
373
i0 = v_round(r0);
374
i1 = v_round(r1);
375
i2 = v_round(r2);
376
i3 = v_round(r3);
377
378
v_uint16x8 us0, us1;
379
us0 = v_pack_u(i0, i1);
380
us1 = v_pack_u(i2, i3);
381
382
v_uint8x16 uc;
383
uc = v_pack(us0, us1);
384
385
v_store(&out[w], uc);
386
}
387
#endif
388
UNUSED(v_op);
389
for (; w < width; w++)
390
{
391
out[w] = saturate<uchar>(s_op(in[w], scalar[0]), std::roundf);
392
}
393
}
394
395
static void run_arithm_s_add3(uchar out[], const uchar in[], int width, const uchar scalar[])
396
{
397
run_arithm_s3(out, in, width, scalar, v_add_16u, s_add_8u);
398
}
399
400
static void run_arithm_s_sub3(uchar out[], const uchar in[], int width, const uchar scalar[])
401
{
402
run_arithm_s3(out, in, width, scalar, v_sub_16u, s_sub_8u);
403
}
404
405
static void run_arithm_s_subr3(uchar out[], const uchar in[], int width, const uchar scalar[])
406
{
407
run_arithm_s3(out, in, width, scalar, v_subr_16u, s_subr_8u); // reverse: subr
408
}
409
410
static void run_arithm_s_add1(uchar out[], const float in[], int width, const float scalar[])
411
{
412
run_arithm_s1(out, in, width, scalar, v_add_32f, s_add_32f);
413
}
414
415
static void run_arithm_s_sub1(uchar out[], const float in[], int width, const float scalar[])
416
{
417
run_arithm_s1(out, in, width, scalar, v_sub_32f, s_sub_32f);
418
}
419
420
static void run_arithm_s_subr1(uchar out[], const float in[], int width, const float scalar[])
421
{
422
run_arithm_s1(out, in, width, scalar, v_subr_32f, s_subr_32f); // reverse: subr
423
}
424
425
// manually unroll the inner cycle by channels
426
template<typename DST, typename SRC, typename SCALAR, typename FUNC>
427
static void run_arithm_s(DST out[], const SRC in[], int width, int chan,
428
const SCALAR scalar[4], FUNC func)
429
{
430
if (chan == 4)
431
{
432
for (int w=0; w < width; w++)
433
{
434
out[4*w + 0] = func(in[4*w + 0], scalar[0]);
435
out[4*w + 1] = func(in[4*w + 1], scalar[1]);
436
out[4*w + 2] = func(in[4*w + 2], scalar[2]);
437
out[4*w + 3] = func(in[4*w + 3], scalar[3]);
438
}
439
}
440
else
441
if (chan == 3)
442
{
443
for (int w=0; w < width; w++)
444
{
445
out[3*w + 0] = func(in[3*w + 0], scalar[0]);
446
out[3*w + 1] = func(in[3*w + 1], scalar[1]);
447
out[3*w + 2] = func(in[3*w + 2], scalar[2]);
448
}
449
}
450
else
451
if (chan == 2)
452
{
453
for (int w=0; w < width; w++)
454
{
455
out[2*w + 0] = func(in[2*w + 0], scalar[0]);
456
out[2*w + 1] = func(in[2*w + 1], scalar[1]);
457
}
458
}
459
else
460
if (chan == 1)
461
{
462
for (int w=0; w < width; w++)
463
{
464
out[w] = func(in[w], scalar[0]);
465
}
466
}
467
else
468
CV_Error(cv::Error::StsBadArg, "unsupported number of channels");
469
}
470
471
template<typename DST, typename SRC>
472
static void run_arithm_s(Buffer &dst, const View &src, const float scalar[4], Arithm arithm,
473
float scale=1)
474
{
475
const auto *in = src.InLine<SRC>(0);
476
auto *out = dst.OutLine<DST>();
477
478
int width = dst.length();
479
int chan = dst.meta().chan;
480
481
// What if we cast the scalar into the SRC type?
482
const SRC myscal[4] = { static_cast<SRC>(scalar[0]), static_cast<SRC>(scalar[1]),
483
static_cast<SRC>(scalar[2]), static_cast<SRC>(scalar[3]) };
484
bool usemyscal = (myscal[0] == scalar[0]) && (myscal[1] == scalar[1]) &&
485
(myscal[2] == scalar[2]) && (myscal[3] == scalar[3]);
486
487
switch (arithm)
488
{
489
case ARITHM_ABSDIFF:
490
for (int w=0; w < width; w++)
491
for (int c=0; c < chan; c++)
492
out[chan*w + c] = absdiff<DST>(in[chan*w + c], scalar[c]);
493
break;
494
case ARITHM_ADD:
495
if (usemyscal)
496
{
497
if (std::is_same<DST,uchar>::value &&
498
std::is_same<SRC,uchar>::value &&
499
chan == 3)
500
run_arithm_s_add3((uchar*)out, (const uchar*)in, width, (const uchar*)myscal);
501
else if (std::is_same<DST,uchar>::value &&
502
std::is_same<SRC,float>::value &&
503
chan == 1)
504
run_arithm_s_add1((uchar*)out, (const float*)in, width, (const float*)myscal);
505
else
506
run_arithm_s(out, in, width, chan, myscal, add<DST,SRC,SRC>);
507
}
508
else
509
run_arithm_s(out, in, width, chan, scalar, add<DST,SRC,float>);
510
break;
511
case ARITHM_SUBTRACT:
512
if (usemyscal)
513
{
514
if (std::is_same<DST,uchar>::value &&
515
std::is_same<SRC,uchar>::value &&
516
chan == 3)
517
run_arithm_s_sub3((uchar*)out, (const uchar*)in, width, (const uchar*)myscal);
518
else if (std::is_same<DST,uchar>::value &&
519
std::is_same<SRC,float>::value &&
520
chan == 1)
521
run_arithm_s_sub1((uchar*)out, (const float*)in, width, (const float*)myscal);
522
else
523
run_arithm_s(out, in, width, chan, myscal, sub<DST,SRC,SRC>);
524
}
525
else
526
run_arithm_s(out, in, width, chan, scalar, sub<DST,SRC,float>);
527
break;
528
// TODO: optimize miltiplication and division
529
case ARITHM_MULTIPLY:
530
for (int w=0; w < width; w++)
531
for (int c=0; c < chan; c++)
532
out[chan*w + c] = mul<DST>(in[chan*w + c], scalar[c], scale);
533
break;
534
case ARITHM_DIVIDE:
535
for (int w=0; w < width; w++)
536
for (int c=0; c < chan; c++)
537
out[chan*w + c] = div<DST>(in[chan*w + c], scalar[c], scale);
538
break;
539
default: CV_Error(cv::Error::StsBadArg, "unsupported arithmetic operation");
540
}
541
}
542
543
template<typename DST, typename SRC>
544
static void run_arithm_rs(Buffer &dst, const View &src, const float scalar[4], Arithm arithm,
545
float scale=1)
546
{
547
const auto *in = src.InLine<SRC>(0);
548
auto *out = dst.OutLine<DST>();
549
550
int width = dst.length();
551
int chan = dst.meta().chan;
552
553
// What if we cast the scalar into the SRC type?
554
const SRC myscal[4] = { static_cast<SRC>(scalar[0]), static_cast<SRC>(scalar[1]),
555
static_cast<SRC>(scalar[2]), static_cast<SRC>(scalar[3]) };
556
bool usemyscal = (myscal[0] == scalar[0]) && (myscal[1] == scalar[1]) &&
557
(myscal[2] == scalar[2]) && (myscal[3] == scalar[3]);
558
559
switch (arithm)
560
{
561
case ARITHM_SUBTRACT:
562
if (usemyscal)
563
{
564
if (std::is_same<DST,uchar>::value &&
565
std::is_same<SRC,uchar>::value &&
566
chan == 3)
567
run_arithm_s_subr3((uchar*)out, (const uchar*)in, width, (const uchar*)myscal);
568
else if (std::is_same<DST,uchar>::value &&
569
std::is_same<SRC,float>::value &&
570
chan == 1)
571
run_arithm_s_subr1((uchar*)out, (const float*)in, width, (const float*)myscal);
572
else
573
run_arithm_s(out, in, width, chan, myscal, subr<DST,SRC,SRC>);
574
}
575
else
576
run_arithm_s(out, in, width, chan, scalar, subr<DST,SRC,float>);
577
break;
578
// TODO: optimize division
579
case ARITHM_DIVIDE:
580
for (int w=0; w < width; w++)
581
for (int c=0; c < chan; c++)
582
out[chan*w + c] = div<DST>(scalar[c], in[chan*w + c], scale);
583
break;
584
default: CV_Error(cv::Error::StsBadArg, "unsupported arithmetic operation");
585
}
586
}
587
588
GAPI_FLUID_KERNEL(GFluidAbsDiffC, cv::gapi::core::GAbsDiffC, false)
589
{
590
static const int Window = 1;
591
592
static void run(const View &src, const cv::Scalar &_scalar, Buffer &dst)
593
{
594
const float scalar[4] = {
595
static_cast<float>(_scalar[0]),
596
static_cast<float>(_scalar[1]),
597
static_cast<float>(_scalar[2]),
598
static_cast<float>(_scalar[3])
599
};
600
601
// DST SRC OP __VA_ARGS__
602
UNARY_(uchar , uchar , run_arithm_s, dst, src, scalar, ARITHM_ABSDIFF);
603
UNARY_(ushort, ushort, run_arithm_s, dst, src, scalar, ARITHM_ABSDIFF);
604
UNARY_( short, short, run_arithm_s, dst, src, scalar, ARITHM_ABSDIFF);
605
606
CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
607
}
608
};
609
610
GAPI_FLUID_KERNEL(GFluidAddC, cv::gapi::core::GAddC, false)
611
{
612
static const int Window = 1;
613
614
static void run(const View &src, const cv::Scalar &_scalar, int /*dtype*/, Buffer &dst)
615
{
616
const float scalar[4] = {
617
static_cast<float>(_scalar[0]),
618
static_cast<float>(_scalar[1]),
619
static_cast<float>(_scalar[2]),
620
static_cast<float>(_scalar[3])
621
};
622
623
// DST SRC OP __VA_ARGS__
624
UNARY_(uchar , uchar , run_arithm_s, dst, src, scalar, ARITHM_ADD);
625
UNARY_(uchar , short, run_arithm_s, dst, src, scalar, ARITHM_ADD);
626
UNARY_(uchar , float, run_arithm_s, dst, src, scalar, ARITHM_ADD);
627
UNARY_( short, short, run_arithm_s, dst, src, scalar, ARITHM_ADD);
628
UNARY_( float, uchar , run_arithm_s, dst, src, scalar, ARITHM_ADD);
629
UNARY_( float, short, run_arithm_s, dst, src, scalar, ARITHM_ADD);
630
UNARY_( float, float, run_arithm_s, dst, src, scalar, ARITHM_ADD);
631
632
CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
633
}
634
};
635
636
GAPI_FLUID_KERNEL(GFluidSubC, cv::gapi::core::GSubC, false)
637
{
638
static const int Window = 1;
639
640
static void run(const View &src, const cv::Scalar &_scalar, int /*dtype*/, Buffer &dst)
641
{
642
const float scalar[4] = {
643
static_cast<float>(_scalar[0]),
644
static_cast<float>(_scalar[1]),
645
static_cast<float>(_scalar[2]),
646
static_cast<float>(_scalar[3])
647
};
648
649
// DST SRC OP __VA_ARGS__
650
UNARY_(uchar , uchar , run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);
651
UNARY_(uchar , short, run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);
652
UNARY_(uchar , float, run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);
653
UNARY_( short, short, run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);
654
UNARY_( float, uchar , run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);
655
UNARY_( float, short, run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);
656
UNARY_( float, float, run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);
657
658
CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
659
}
660
};
661
662
GAPI_FLUID_KERNEL(GFluidSubRC, cv::gapi::core::GSubRC, false)
663
{
664
static const int Window = 1;
665
666
static void run(const cv::Scalar &_scalar, const View &src, int /*dtype*/, Buffer &dst)
667
{
668
const float scalar[4] = {
669
static_cast<float>(_scalar[0]),
670
static_cast<float>(_scalar[1]),
671
static_cast<float>(_scalar[2]),
672
static_cast<float>(_scalar[3])
673
};
674
675
// DST SRC OP __VA_ARGS__
676
UNARY_(uchar , uchar , run_arithm_rs, dst, src, scalar, ARITHM_SUBTRACT);
677
UNARY_(uchar , short, run_arithm_rs, dst, src, scalar, ARITHM_SUBTRACT);
678
UNARY_(uchar , float, run_arithm_rs, dst, src, scalar, ARITHM_SUBTRACT);
679
UNARY_( short, short, run_arithm_rs, dst, src, scalar, ARITHM_SUBTRACT);
680
UNARY_( float, uchar , run_arithm_rs, dst, src, scalar, ARITHM_SUBTRACT);
681
UNARY_( float, short, run_arithm_rs, dst, src, scalar, ARITHM_SUBTRACT);
682
UNARY_( float, float, run_arithm_rs, dst, src, scalar, ARITHM_SUBTRACT);
683
684
CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
685
}
686
};
687
688
GAPI_FLUID_KERNEL(GFluidMulC, cv::gapi::core::GMulC, false)
689
{
690
static const int Window = 1;
691
692
static void run(const View &src, const cv::Scalar &_scalar, int /*dtype*/, Buffer &dst)
693
{
694
const float scalar[4] = {
695
static_cast<float>(_scalar[0]),
696
static_cast<float>(_scalar[1]),
697
static_cast<float>(_scalar[2]),
698
static_cast<float>(_scalar[3])
699
};
700
const float scale = 1.f;
701
702
// DST SRC OP __VA_ARGS__
703
UNARY_(uchar , uchar , run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);
704
UNARY_(uchar , short, run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);
705
UNARY_(uchar , float, run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);
706
UNARY_( short, short, run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);
707
UNARY_( float, uchar , run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);
708
UNARY_( float, short, run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);
709
UNARY_( float, float, run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);
710
711
CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
712
}
713
};
714
715
GAPI_FLUID_KERNEL(GFluidMulCOld, cv::gapi::core::GMulCOld, false)
716
{
717
static const int Window = 1;
718
719
static void run(const View &src, double _scalar, int /*dtype*/, Buffer &dst)
720
{
721
const float scalar[4] = {
722
static_cast<float>(_scalar),
723
static_cast<float>(_scalar),
724
static_cast<float>(_scalar),
725
static_cast<float>(_scalar)
726
};
727
const float scale = 1.f;
728
729
// DST SRC OP __VA_ARGS__
730
UNARY_(uchar , uchar , run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);
731
UNARY_(uchar , short, run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);
732
UNARY_(uchar , float, run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);
733
UNARY_( short, short, run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);
734
UNARY_( float, uchar , run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);
735
UNARY_( float, short, run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);
736
UNARY_( float, float, run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);
737
738
CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
739
}
740
};
741
742
GAPI_FLUID_KERNEL(GFluidDivC, cv::gapi::core::GDivC, false)
743
{
744
static const int Window = 1;
745
746
static void run(const View &src, const cv::Scalar &_scalar, double _scale, int /*dtype*/,
747
Buffer &dst)
748
{
749
const float scalar[4] = {
750
static_cast<float>(_scalar[0]),
751
static_cast<float>(_scalar[1]),
752
static_cast<float>(_scalar[2]),
753
static_cast<float>(_scalar[3])
754
};
755
const float scale = static_cast<float>(_scale);
756
757
// DST SRC OP __VA_ARGS__
758
UNARY_(uchar , uchar , run_arithm_s, dst, src, scalar, ARITHM_DIVIDE, scale);
759
UNARY_(uchar , short, run_arithm_s, dst, src, scalar, ARITHM_DIVIDE, scale);
760
UNARY_(uchar , float, run_arithm_s, dst, src, scalar, ARITHM_DIVIDE, scale);
761
UNARY_( short, short, run_arithm_s, dst, src, scalar, ARITHM_DIVIDE, scale);
762
UNARY_( float, uchar , run_arithm_s, dst, src, scalar, ARITHM_DIVIDE, scale);
763
UNARY_( float, short, run_arithm_s, dst, src, scalar, ARITHM_DIVIDE, scale);
764
UNARY_( float, float, run_arithm_s, dst, src, scalar, ARITHM_DIVIDE, scale);
765
766
CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
767
}
768
};
769
770
GAPI_FLUID_KERNEL(GFluidDivRC, cv::gapi::core::GDivRC, false)
771
{
772
static const int Window = 1;
773
774
static void run(const cv::Scalar &_scalar, const View &src, double _scale, int /*dtype*/,
775
Buffer &dst)
776
{
777
const float scalar[4] = {
778
static_cast<float>(_scalar[0]),
779
static_cast<float>(_scalar[1]),
780
static_cast<float>(_scalar[2]),
781
static_cast<float>(_scalar[3])
782
};
783
const float scale = static_cast<float>(_scale);
784
785
// DST SRC OP __VA_ARGS__
786
UNARY_(uchar , uchar , run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
787
UNARY_(uchar , short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
788
UNARY_(uchar , float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
789
UNARY_( short, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
790
UNARY_( float, uchar , run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
791
UNARY_( float, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
792
UNARY_( float, float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
793
794
CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
795
}
796
};
797
798
//----------------------------
799
//
800
// Fluid math kernels: bitwise
801
//
802
//----------------------------
803
804
enum Bitwise { BW_AND, BW_OR, BW_XOR, BW_NOT };
805
806
template<typename DST, typename SRC1, typename SRC2>
807
static void run_bitwise2(Buffer &dst, const View &src1, const View &src2, Bitwise bitwise)
808
{
809
static_assert(std::is_same<DST, SRC1>::value, "wrong types");
810
static_assert(std::is_same<DST, SRC2>::value, "wrong types");
811
812
const auto *in1 = src1.InLine<SRC1>(0);
813
const auto *in2 = src2.InLine<SRC2>(0);
814
auto *out = dst.OutLine<DST>();
815
816
int width = dst.length();
817
int chan = dst.meta().chan;
818
int length = width * chan;
819
820
switch (bitwise)
821
{
822
case BW_AND:
823
for (int l=0; l < length; l++)
824
out[l] = in1[l] & in2[l];
825
break;
826
case BW_OR:
827
for (int l=0; l < length; l++)
828
out[l] = in1[l] | in2[l];
829
break;
830
case BW_XOR:
831
for (int l=0; l < length; l++)
832
out[l] = in1[l] ^ in2[l];
833
break;
834
default: CV_Error(cv::Error::StsBadArg, "unsupported bitwise operation");
835
}
836
}
837
838
template<typename DST, typename SRC>
839
static void run_bitwise1(Buffer &dst, const View &src, Bitwise bitwise)
840
{
841
static_assert(std::is_same<DST, SRC>::value, "wrong types");
842
843
const auto *in = src.InLine<SRC>(0);
844
auto *out = dst.OutLine<DST>();
845
846
int width = dst.length();
847
int chan = dst.meta().chan;
848
int length = width * chan;
849
850
switch (bitwise)
851
{
852
case BW_NOT:
853
for (int l=0; l < length; l++)
854
out[l] = ~in[l];
855
break;
856
default: CV_Error(cv::Error::StsBadArg, "unsupported bitwise operation");
857
}
858
}
859
860
GAPI_FLUID_KERNEL(GFluidAnd, cv::gapi::core::GAnd, false)
861
{
862
static const int Window = 1;
863
864
static void run(const View &src1, const View &src2, Buffer &dst)
865
{
866
867
// DST SRC1 SRC2 OP __VA_ARGS__
868
BINARY_(uchar , uchar , uchar , run_bitwise2, dst, src1, src2, BW_AND);
869
BINARY_(ushort, ushort, ushort, run_bitwise2, dst, src1, src2, BW_AND);
870
BINARY_( short, short, short, run_bitwise2, dst, src1, src2, BW_AND);
871
872
CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
873
}
874
};
875
876
GAPI_FLUID_KERNEL(GFluidOr, cv::gapi::core::GOr, false)
877
{
878
static const int Window = 1;
879
880
static void run(const View &src1, const View &src2, Buffer &dst)
881
{
882
883
// DST SRC1 SRC2 OP __VA_ARGS__
884
BINARY_(uchar , uchar , uchar , run_bitwise2, dst, src1, src2, BW_OR);
885
BINARY_(ushort, ushort, ushort, run_bitwise2, dst, src1, src2, BW_OR);
886
BINARY_( short, short, short, run_bitwise2, dst, src1, src2, BW_OR);
887
888
CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
889
}
890
};
891
892
GAPI_FLUID_KERNEL(GFluidXor, cv::gapi::core::GXor, false)
893
{
894
static const int Window = 1;
895
896
static void run(const View &src1, const View &src2, Buffer &dst)
897
{
898
899
// DST SRC1 SRC2 OP __VA_ARGS__
900
BINARY_(uchar , uchar , uchar , run_bitwise2, dst, src1, src2, BW_XOR);
901
BINARY_(ushort, ushort, ushort, run_bitwise2, dst, src1, src2, BW_XOR);
902
BINARY_( short, short, short, run_bitwise2, dst, src1, src2, BW_XOR);
903
904
CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
905
}
906
};
907
908
GAPI_FLUID_KERNEL(GFluidNot, cv::gapi::core::GNot, false)
909
{
910
static const int Window = 1;
911
912
static void run(const View &src, Buffer &dst)
913
{
914
// DST SRC OP __VA_ARGS__
915
UNARY_(uchar , uchar , run_bitwise1, dst, src, BW_NOT);
916
UNARY_(ushort, ushort, run_bitwise1, dst, src, BW_NOT);
917
UNARY_( short, short, run_bitwise1, dst, src, BW_NOT);
918
919
CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
920
}
921
};
922
923
//-------------------
924
//
925
// Fluid kernels: LUT
926
//
927
//-------------------
928
929
GAPI_FLUID_KERNEL(GFluidLUT, cv::gapi::core::GLUT, false)
930
{
931
static const int Window = 1;
932
933
static void run(const View &src, const cv::Mat& lut, Buffer &dst)
934
{
935
GAPI_Assert(CV_8U == dst.meta().depth);
936
GAPI_Assert(CV_8U == src.meta().depth);
937
938
GAPI_DbgAssert(CV_8U == lut.type());
939
GAPI_DbgAssert(256 == lut.cols * lut.rows);
940
GAPI_DbgAssert(dst.length() == src.length());
941
GAPI_DbgAssert(dst.meta().chan == src.meta().chan);
942
943
const auto *in = src.InLine<uchar>(0);
944
auto *out = dst.OutLine<uchar>();
945
946
int width = dst.length();
947
int chan = dst.meta().chan;
948
int length = width * chan;
949
950
for (int l=0; l < length; l++)
951
out[l] = lut.data[ in[l] ];
952
}
953
};
954
955
//-------------------------
956
//
957
// Fluid kernels: convertTo
958
//
959
//-------------------------
960
961
template<typename DST, typename SRC>
962
static void run_convertto(Buffer &dst, const View &src, double _alpha, double _beta)
963
{
964
const auto *in = src.InLine<SRC>(0);
965
auto *out = dst.OutLine<DST>();
966
967
int width = dst.length();
968
int chan = dst.meta().chan;
969
int length = width * chan;
970
971
// NB: don't do this if SRC or DST is 64-bit
972
auto alpha = static_cast<float>( _alpha );
973
auto beta = static_cast<float>( _beta );
974
975
// compute faster if no alpha no beta
976
if (alpha == 1 && beta == 0)
977
{
978
// manual SIMD if need rounding
979
if (std::is_integral<DST>::value && std::is_floating_point<SRC>::value)
980
{
981
GAPI_Assert(( std::is_same<SRC,float>::value ));
982
983
int l = 0; // cycle index
984
985
#if CV_SIMD128
986
if (std::is_same<DST,uchar>::value)
987
{
988
for (; l <= length-16; l+=16)
989
{
990
v_int32x4 i0, i1, i2, i3;
991
i0 = v_round( v_load( (float*)& in[l ] ) );
992
i1 = v_round( v_load( (float*)& in[l + 4] ) );
993
i2 = v_round( v_load( (float*)& in[l + 8] ) );
994
i3 = v_round( v_load( (float*)& in[l + 12] ) );
995
996
v_uint16x8 us0, us1;
997
us0 = v_pack_u(i0, i1);
998
us1 = v_pack_u(i2, i3);
999
1000
v_uint8x16 uc;
1001
uc = v_pack(us0, us1);
1002
v_store((uchar*)& out[l], uc);
1003
}
1004
}
1005
if (std::is_same<DST,ushort>::value)
1006
{
1007
for (; l <= length-8; l+=8)
1008
{
1009
v_int32x4 i0, i1;
1010
i0 = v_round( v_load( (float*)& in[l ] ) );
1011
i1 = v_round( v_load( (float*)& in[l + 4] ) );
1012
1013
v_uint16x8 us;
1014
us = v_pack_u(i0, i1);
1015
v_store((ushort*)& out[l], us);
1016
}
1017
}
1018
#endif
1019
1020
// tail of SIMD cycle
1021
for (; l < length; l++)
1022
{
1023
out[l] = saturate<DST>(in[l], rintf);
1024
}
1025
}
1026
else if (std::is_integral<DST>::value) // here SRC is integral
1027
{
1028
for (int l=0; l < length; l++)
1029
{
1030
out[l] = saturate<DST>(in[l]);
1031
}
1032
}
1033
else // DST is floating-point, SRC is any
1034
{
1035
for (int l=0; l < length; l++)
1036
{
1037
out[l] = static_cast<DST>(in[l]);
1038
}
1039
}
1040
}
1041
else // if alpha or beta is non-trivial
1042
{
1043
// TODO: optimize if alpha and beta and data are integral
1044
for (int l=0; l < length; l++)
1045
{
1046
out[l] = saturate<DST>(in[l]*alpha + beta, rintf);
1047
}
1048
}
1049
}
1050
1051
GAPI_FLUID_KERNEL(GFluidConvertTo, cv::gapi::core::GConvertTo, false)
1052
{
1053
static const int Window = 1;
1054
1055
static void run(const View &src, int /*rtype*/, double alpha, double beta, Buffer &dst)
1056
{
1057
// DST SRC OP __VA_ARGS__
1058
UNARY_(uchar , uchar , run_convertto, dst, src, alpha, beta);
1059
UNARY_(uchar , ushort, run_convertto, dst, src, alpha, beta);
1060
UNARY_(uchar , float, run_convertto, dst, src, alpha, beta);
1061
UNARY_(ushort, uchar , run_convertto, dst, src, alpha, beta);
1062
UNARY_(ushort, ushort, run_convertto, dst, src, alpha, beta);
1063
UNARY_(ushort, float, run_convertto, dst, src, alpha, beta);
1064
UNARY_( float, uchar , run_convertto, dst, src, alpha, beta);
1065
UNARY_( float, ushort, run_convertto, dst, src, alpha, beta);
1066
UNARY_( float, float, run_convertto, dst, src, alpha, beta);
1067
1068
CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
1069
}
1070
};
1071
1072
//-----------------------------
1073
//
1074
// Fluid math kernels: min, max
1075
//
1076
//-----------------------------
1077
1078
enum Minmax { MM_MIN, MM_MAX };
1079
1080
template<typename DST, typename SRC1, typename SRC2>
1081
static void run_minmax(Buffer &dst, const View &src1, const View &src2, Minmax minmax)
1082
{
1083
static_assert(std::is_same<DST, SRC1>::value, "wrong types");
1084
static_assert(std::is_same<DST, SRC2>::value, "wrong types");
1085
1086
const auto *in1 = src1.InLine<SRC1>(0);
1087
const auto *in2 = src2.InLine<SRC2>(0);
1088
auto *out = dst.OutLine<DST>();
1089
1090
int width = dst.length();
1091
int chan = dst.meta().chan;
1092
1093
int length = width * chan;
1094
1095
switch (minmax)
1096
{
1097
case MM_MIN:
1098
for (int l=0; l < length; l++)
1099
out[l] = in1[l] < in2[l]? in1[l]: in2[l];
1100
break;
1101
case MM_MAX:
1102
for (int l=0; l < length; l++)
1103
out[l] = in1[l] > in2[l]? in1[l]: in2[l];
1104
break;
1105
default: CV_Error(cv::Error::StsBadArg, "unsupported min/max operation");
1106
}
1107
}
1108
1109
GAPI_FLUID_KERNEL(GFluidMin, cv::gapi::core::GMin, false)
1110
{
1111
static const int Window = 1;
1112
1113
static void run(const View &src1, const View &src2, Buffer &dst)
1114
{
1115
// DST SRC1 SRC2 OP __VA_ARGS__
1116
BINARY_(uchar , uchar , uchar , run_minmax, dst, src1, src2, MM_MIN);
1117
BINARY_(ushort, ushort, ushort, run_minmax, dst, src1, src2, MM_MIN);
1118
BINARY_( short, short, short, run_minmax, dst, src1, src2, MM_MIN);
1119
BINARY_( float, float, float, run_minmax, dst, src1, src2, MM_MIN);
1120
1121
CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
1122
}
1123
};
1124
1125
GAPI_FLUID_KERNEL(GFluidMax, cv::gapi::core::GMax, false)
1126
{
1127
static const int Window = 1;
1128
1129
static void run(const View &src1, const View &src2, Buffer &dst)
1130
{
1131
// DST SRC1 SRC2 OP __VA_ARGS__
1132
BINARY_(uchar , uchar , uchar , run_minmax, dst, src1, src2, MM_MAX);
1133
BINARY_(ushort, ushort, ushort, run_minmax, dst, src1, src2, MM_MAX);
1134
BINARY_( short, short, short, run_minmax, dst, src1, src2, MM_MAX);
1135
BINARY_( float, float, float, run_minmax, dst, src1, src2, MM_MAX);
1136
1137
CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
1138
}
1139
};
1140
1141
//-----------------------
1142
//
1143
// Fluid kernels: compare
1144
//
1145
//-----------------------
1146
1147
enum Compare { CMP_EQ, CMP_NE, CMP_GE, CMP_GT, CMP_LE, CMP_LT };
1148
1149
template<typename DST, typename SRC1, typename SRC2>
1150
static void run_cmp(Buffer &dst, const View &src1, const View &src2, Compare compare)
1151
{
1152
static_assert(std::is_same<SRC1, SRC2>::value, "wrong types");
1153
static_assert(std::is_same<DST, uchar>::value, "wrong types");
1154
1155
const auto *in1 = src1.InLine<SRC1>(0);
1156
const auto *in2 = src2.InLine<SRC2>(0);
1157
auto *out = dst.OutLine<DST>();
1158
1159
int width = dst.length();
1160
int chan = dst.meta().chan;
1161
1162
int length = width * chan;
1163
1164
switch (compare)
1165
{
1166
case CMP_EQ:
1167
for (int l=0; l < length; l++)
1168
out[l] = in1[l] == in2[l]? 255: 0;
1169
break;
1170
case CMP_NE:
1171
for (int l=0; l < length; l++)
1172
out[l] = in1[l] != in2[l]? 255: 0;
1173
break;
1174
case CMP_GE:
1175
for (int l=0; l < length; l++)
1176
out[l] = in1[l] >= in2[l]? 255: 0;
1177
break;
1178
case CMP_LE:
1179
for (int l=0; l < length; l++)
1180
out[l] = in1[l] <= in2[l]? 255: 0;
1181
break;
1182
case CMP_GT:
1183
for (int l=0; l < length; l++)
1184
out[l] = in1[l] > in2[l]? 255: 0;
1185
break;
1186
case CMP_LT:
1187
for (int l=0; l < length; l++)
1188
out[l] = in1[l] < in2[l]? 255: 0;
1189
break;
1190
default:
1191
CV_Error(cv::Error::StsBadArg, "unsupported compare operation");
1192
}
1193
}
1194
1195
GAPI_FLUID_KERNEL(GFluidCmpEQ, cv::gapi::core::GCmpEQ, false)
1196
{
1197
static const int Window = 1;
1198
1199
static void run(const View &src1, const View &src2, Buffer &dst)
1200
{
1201
// DST SRC1 SRC2 OP __VA_ARGS__
1202
BINARY_(uchar, uchar , uchar , run_cmp, dst, src1, src2, CMP_EQ);
1203
BINARY_(uchar, short, short, run_cmp, dst, src1, src2, CMP_EQ);
1204
BINARY_(uchar, float, float, run_cmp, dst, src1, src2, CMP_EQ);
1205
1206
CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
1207
}
1208
};
1209
1210
GAPI_FLUID_KERNEL(GFluidCmpNE, cv::gapi::core::GCmpNE, false)
1211
{
1212
static const int Window = 1;
1213
1214
static void run(const View &src1, const View &src2, Buffer &dst)
1215
{
1216
// DST SRC1 SRC2 OP __VA_ARGS__
1217
BINARY_(uchar, uchar , uchar , run_cmp, dst, src1, src2, CMP_NE);
1218
BINARY_(uchar, short, short, run_cmp, dst, src1, src2, CMP_NE);
1219
BINARY_(uchar, float, float, run_cmp, dst, src1, src2, CMP_NE);
1220
1221
CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
1222
}
1223
};
1224
1225
GAPI_FLUID_KERNEL(GFluidCmpGE, cv::gapi::core::GCmpGE, false)
1226
{
1227
static const int Window = 1;
1228
1229
static void run(const View &src1, const View &src2, Buffer &dst)
1230
{
1231
// DST SRC1 SRC2 OP __VA_ARGS__
1232
BINARY_(uchar, uchar , uchar , run_cmp, dst, src1, src2, CMP_GE);
1233
BINARY_(uchar, short, short, run_cmp, dst, src1, src2, CMP_GE);
1234
BINARY_(uchar, float, float, run_cmp, dst, src1, src2, CMP_GE);
1235
1236
CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
1237
}
1238
};
1239
1240
GAPI_FLUID_KERNEL(GFluidCmpGT, cv::gapi::core::GCmpGT, false)
1241
{
1242
static const int Window = 1;
1243
1244
static void run(const View &src1, const View &src2, Buffer &dst)
1245
{
1246
// DST SRC1 SRC2 OP __VA_ARGS__
1247
BINARY_(uchar, uchar , uchar , run_cmp, dst, src1, src2, CMP_GT);
1248
BINARY_(uchar, short, short, run_cmp, dst, src1, src2, CMP_GT);
1249
BINARY_(uchar, float, float, run_cmp, dst, src1, src2, CMP_GT);
1250
1251
CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
1252
}
1253
};
1254
1255
GAPI_FLUID_KERNEL(GFluidCmpLE, cv::gapi::core::GCmpLE, false)
1256
{
1257
static const int Window = 1;
1258
1259
static void run(const View &src1, const View &src2, Buffer &dst)
1260
{
1261
// DST SRC1 SRC2 OP __VA_ARGS__
1262
BINARY_(uchar, uchar , uchar , run_cmp, dst, src1, src2, CMP_LE);
1263
BINARY_(uchar, short, short, run_cmp, dst, src1, src2, CMP_LE);
1264
BINARY_(uchar, float, float, run_cmp, dst, src1, src2, CMP_LE);
1265
1266
CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
1267
}
1268
};
1269
1270
GAPI_FLUID_KERNEL(GFluidCmpLT, cv::gapi::core::GCmpLT, false)
1271
{
1272
static const int Window = 1;
1273
1274
static void run(const View &src1, const View &src2, Buffer &dst)
1275
{
1276
// DST SRC1 SRC2 OP __VA_ARGS__
1277
BINARY_(uchar, uchar , uchar , run_cmp, dst, src1, src2, CMP_LT);
1278
BINARY_(uchar, short, short, run_cmp, dst, src1, src2, CMP_LT);
1279
BINARY_(uchar, float, float, run_cmp, dst, src1, src2, CMP_LT);
1280
1281
CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
1282
}
1283
};
1284
1285
//---------------------
1286
//
1287
// Compare with GScalar
1288
//
1289
//---------------------
1290
1291
template<typename DST, typename SRC, typename SCALAR=double>
1292
static void run_cmp(DST out[], const SRC in[], int length, Compare compare, SCALAR s)
1293
{
1294
switch (compare)
1295
{
1296
case CMP_EQ:
1297
for (int l=0; l < length; l++)
1298
out[l] = in[l] == s? 255: 0;
1299
break;
1300
case CMP_NE:
1301
for (int l=0; l < length; l++)
1302
out[l] = in[l] != s? 255: 0;
1303
break;
1304
case CMP_GE:
1305
for (int l=0; l < length; l++)
1306
out[l] = in[l] >= s? 255: 0;
1307
break;
1308
case CMP_LE:
1309
for (int l=0; l < length; l++)
1310
out[l] = in[l] <= s? 255: 0;
1311
break;
1312
case CMP_GT:
1313
for (int l=0; l < length; l++)
1314
out[l] = in[l] > s? 255: 0;
1315
break;
1316
case CMP_LT:
1317
for (int l=0; l < length; l++)
1318
out[l] = in[l] < s? 255: 0;
1319
break;
1320
default:
1321
CV_Error(cv::Error::StsBadArg, "unsupported compare operation");
1322
}
1323
}
1324
1325
template<typename DST, typename SRC>
1326
static void run_cmp(Buffer &dst, const View &src, Compare compare, const cv::Scalar &scalar)
1327
{
1328
static_assert(std::is_same<DST, uchar>::value, "wrong types");
1329
1330
const auto *in = src.InLine<SRC>(0);
1331
auto *out = dst.OutLine<DST>();
1332
1333
int width = dst.length();
1334
int chan = dst.meta().chan;
1335
1336
int length = width * chan;
1337
1338
// compute faster if scalar rounds to SRC
1339
double d = scalar[0] ;
1340
SRC s = static_cast<SRC>( scalar[0] );
1341
1342
if (s == d)
1343
run_cmp(out, in, length, compare, s);
1344
else
1345
run_cmp(out, in, length, compare, d);
1346
}
1347
1348
GAPI_FLUID_KERNEL(GFluidCmpEQScalar, cv::gapi::core::GCmpEQScalar, false)
1349
{
1350
static const int Window = 1;
1351
1352
static void run(const View &src, const cv::Scalar &scalar, Buffer &dst)
1353
{
1354
// DST SRC OP __VA_ARGS__
1355
UNARY_(uchar, uchar , run_cmp, dst, src, CMP_EQ, scalar);
1356
UNARY_(uchar, short, run_cmp, dst, src, CMP_EQ, scalar);
1357
UNARY_(uchar, float, run_cmp, dst, src, CMP_EQ, scalar);
1358
1359
CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
1360
}
1361
};
1362
1363
GAPI_FLUID_KERNEL(GFluidCmpNEScalar, cv::gapi::core::GCmpNEScalar, false)
1364
{
1365
static const int Window = 1;
1366
1367
static void run(const View &src, const cv::Scalar &scalar, Buffer &dst)
1368
{
1369
// DST SRC OP __VA_ARGS__
1370
UNARY_(uchar, uchar , run_cmp, dst, src, CMP_NE, scalar);
1371
UNARY_(uchar, short, run_cmp, dst, src, CMP_NE, scalar);
1372
UNARY_(uchar, float, run_cmp, dst, src, CMP_NE, scalar);
1373
1374
CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
1375
}
1376
};
1377
1378
GAPI_FLUID_KERNEL(GFluidCmpGEScalar, cv::gapi::core::GCmpGEScalar, false)
1379
{
1380
static const int Window = 1;
1381
1382
static void run(const View &src, const cv::Scalar &scalar, Buffer &dst)
1383
{
1384
// DST SRC OP __VA_ARGS__
1385
UNARY_(uchar, uchar , run_cmp, dst, src, CMP_GE, scalar);
1386
UNARY_(uchar, short, run_cmp, dst, src, CMP_GE, scalar);
1387
UNARY_(uchar, float, run_cmp, dst, src, CMP_GE, scalar);
1388
1389
CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
1390
}
1391
};
1392
1393
GAPI_FLUID_KERNEL(GFluidCmpGTScalar, cv::gapi::core::GCmpGTScalar, false)
1394
{
1395
static const int Window = 1;
1396
1397
static void run(const View &src, const cv::Scalar &scalar, Buffer &dst)
1398
{
1399
// DST SRC OP __VA_ARGS__
1400
UNARY_(uchar, uchar , run_cmp, dst, src, CMP_GT, scalar);
1401
UNARY_(uchar, short, run_cmp, dst, src, CMP_GT, scalar);
1402
UNARY_(uchar, float, run_cmp, dst, src, CMP_GT, scalar);
1403
1404
CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
1405
}
1406
};
1407
1408
GAPI_FLUID_KERNEL(GFluidCmpLEScalar, cv::gapi::core::GCmpLEScalar, false)
1409
{
1410
static const int Window = 1;
1411
1412
static void run(const View &src, const cv::Scalar &scalar, Buffer &dst)
1413
{
1414
// DST SRC OP __VA_ARGS__
1415
UNARY_(uchar, uchar , run_cmp, dst, src, CMP_LE, scalar);
1416
UNARY_(uchar, short, run_cmp, dst, src, CMP_LE, scalar);
1417
UNARY_(uchar, float, run_cmp, dst, src, CMP_LE, scalar);
1418
1419
CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
1420
}
1421
};
1422
1423
GAPI_FLUID_KERNEL(GFluidCmpLTScalar, cv::gapi::core::GCmpLTScalar, false)
1424
{
1425
static const int Window = 1;
1426
1427
static void run(const View &src, const cv::Scalar &scalar, Buffer &dst)
1428
{
1429
// DST SRC OP __VA_ARGS__
1430
UNARY_(uchar, uchar , run_cmp, dst, src, CMP_LT, scalar);
1431
UNARY_(uchar, short, run_cmp, dst, src, CMP_LT, scalar);
1432
UNARY_(uchar, float, run_cmp, dst, src, CMP_LT, scalar);
1433
1434
CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
1435
}
1436
};
1437
1438
//-------------------------
1439
//
1440
// Fluid kernels: threshold
1441
//
1442
//-------------------------
1443
1444
template<typename DST, typename SRC>
1445
static void run_threshold(Buffer &dst, const View &src, const cv::Scalar &thresh,
1446
const cv::Scalar &maxval,
1447
int type)
1448
{
1449
static_assert(std::is_same<DST, SRC>::value, "wrong types");
1450
1451
const auto *in = src.InLine<SRC>(0);
1452
auto *out = dst.OutLine<DST>();
1453
1454
int width = dst.length();
1455
int chan = dst.meta().chan;
1456
1457
int length = width * chan;
1458
1459
DST thresh_ = saturate<DST>(thresh[0], floord);
1460
DST threshd = saturate<DST>(thresh[0], roundd);
1461
DST maxvald = saturate<DST>(maxval[0], roundd);
1462
1463
switch (type)
1464
{
1465
case cv::THRESH_BINARY:
1466
for (int l=0; l < length; l++)
1467
out[l] = in[l] > thresh_? maxvald: 0;
1468
break;
1469
case cv::THRESH_BINARY_INV:
1470
for (int l=0; l < length; l++)
1471
out[l] = in[l] > thresh_? 0: maxvald;
1472
break;
1473
case cv::THRESH_TRUNC:
1474
for (int l=0; l < length; l++)
1475
out[l] = in[l] > thresh_? threshd: in[l];
1476
break;
1477
case cv::THRESH_TOZERO:
1478
for (int l=0; l < length; l++)
1479
out[l] = in[l] > thresh_? in[l]: 0;
1480
break;
1481
case cv::THRESH_TOZERO_INV:
1482
for (int l=0; l < length; l++)
1483
out[l] = in[l] > thresh_? 0: in[l];
1484
break;
1485
default: CV_Error(cv::Error::StsBadArg, "unsupported threshold type");
1486
}
1487
}
1488
1489
GAPI_FLUID_KERNEL(GFluidThreshold, cv::gapi::core::GThreshold, false)
1490
{
1491
static const int Window = 1;
1492
1493
static void run(const View &src, const cv::Scalar &thresh,
1494
const cv::Scalar &maxval,
1495
int type,
1496
Buffer &dst)
1497
{
1498
// DST SRC OP __VA_ARGS__
1499
UNARY_(uchar , uchar , run_threshold, dst, src, thresh, maxval, type);
1500
UNARY_(ushort, ushort, run_threshold, dst, src, thresh, maxval, type);
1501
UNARY_( short, short, run_threshold, dst, src, thresh, maxval, type);
1502
1503
CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
1504
}
1505
};
1506
1507
//------------------------
1508
//
1509
// Fluid kernels: in-range
1510
//
1511
//------------------------
1512
1513
static void run_inrange3(uchar out[], const uchar in[], int width,
1514
const uchar lower[], const uchar upper[])
1515
{
1516
int w = 0; // cycle index
1517
1518
#if CV_SIMD128
1519
for (; w <= width-16; w+=16)
1520
{
1521
v_uint8x16 i0, i1, i2;
1522
v_load_deinterleave(&in[3*w], i0, i1, i2);
1523
1524
v_uint8x16 o;
1525
o = (i0 >= v_setall_u8(lower[0])) & (i0 <= v_setall_u8(upper[0])) &
1526
(i1 >= v_setall_u8(lower[1])) & (i1 <= v_setall_u8(upper[1])) &
1527
(i2 >= v_setall_u8(lower[2])) & (i2 <= v_setall_u8(upper[2]));
1528
1529
v_store(&out[w], o);
1530
}
1531
#endif
1532
1533
for (; w < width; w++)
1534
{
1535
out[w] = in[3*w ] >= lower[0] && in[3*w ] <= upper[0] &&
1536
in[3*w+1] >= lower[1] && in[3*w+1] <= upper[1] &&
1537
in[3*w+2] >= lower[2] && in[3*w+2] <= upper[2] ? 255: 0;
1538
}
1539
}
1540
1541
template<typename DST, typename SRC>
1542
static void run_inrange(Buffer &dst, const View &src, const cv::Scalar &upperb,
1543
const cv::Scalar &lowerb)
1544
{
1545
static_assert(std::is_same<DST, uchar>::value, "wrong types");
1546
static_assert(std::is_integral<SRC>::value, "wrong types");
1547
1548
const auto *in = src.InLine<SRC>(0);
1549
auto *out = dst.OutLine<DST>();
1550
1551
int width = src.length();
1552
int chan = src.meta().chan;
1553
GAPI_Assert(dst.meta().chan == 1);
1554
1555
// for integral input, in[i] >= lower equals in[i] >= ceil(lower)
1556
// so we can optimize compare operations by rounding lower/upper
1557
SRC lower[4], upper[4];
1558
for (int c=0; c < chan; c++)
1559
{
1560
lower[c] = saturate<SRC>(lowerb[c], ceild);
1561
upper[c] = saturate<SRC>(upperb[c], floord);
1562
}
1563
1564
// manually SIMD for important case if RGB/BGR
1565
if (std::is_same<SRC,uchar>::value && chan==3)
1566
{
1567
run_inrange3((uchar*)out, (const uchar*)in, width,
1568
(const uchar*)lower, (const uchar*)upper);
1569
return;
1570
}
1571
1572
// TODO: please manually SIMD if multiple channels:
1573
// modern compilers would perfectly vectorize this code if one channel,
1574
// but may need help with de-interleaving channels if RGB/BGR image etc
1575
switch (chan)
1576
{
1577
case 1:
1578
for (int w=0; w < width; w++)
1579
out[w] = in[w] >= lower[0] && in[w] <= upper[0]? 255: 0;
1580
break;
1581
case 2:
1582
for (int w=0; w < width; w++)
1583
out[w] = in[2*w ] >= lower[0] && in[2*w ] <= upper[0] &&
1584
in[2*w+1] >= lower[1] && in[2*w+1] <= upper[1] ? 255: 0;
1585
break;
1586
case 3:
1587
for (int w=0; w < width; w++)
1588
out[w] = in[3*w ] >= lower[0] && in[3*w ] <= upper[0] &&
1589
in[3*w+1] >= lower[1] && in[3*w+1] <= upper[1] &&
1590
in[3*w+2] >= lower[2] && in[3*w+2] <= upper[2] ? 255: 0;
1591
break;
1592
case 4:
1593
for (int w=0; w < width; w++)
1594
out[w] = in[4*w ] >= lower[0] && in[4*w ] <= upper[0] &&
1595
in[4*w+1] >= lower[1] && in[4*w+1] <= upper[1] &&
1596
in[4*w+2] >= lower[2] && in[4*w+2] <= upper[2] &&
1597
in[4*w+3] >= lower[3] && in[4*w+3] <= upper[3] ? 255: 0;
1598
break;
1599
default: CV_Error(cv::Error::StsBadArg, "unsupported number of channels");
1600
}
1601
}
1602
1603
GAPI_FLUID_KERNEL(GFluidInRange, cv::gapi::core::GInRange, false)
1604
{
1605
static const int Window = 1;
1606
1607
static void run(const View &src, const cv::Scalar &lowerb, const cv::Scalar& upperb,
1608
Buffer &dst)
1609
{
1610
// DST SRC OP __VA_ARGS__
1611
INRANGE_(uchar, uchar , run_inrange, dst, src, upperb, lowerb);
1612
INRANGE_(uchar, ushort, run_inrange, dst, src, upperb, lowerb);
1613
INRANGE_(uchar, short, run_inrange, dst, src, upperb, lowerb);
1614
1615
CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
1616
}
1617
};
1618
1619
//----------------------
1620
//
1621
// Fluid kernels: select
1622
//
1623
//----------------------
1624
1625
// manually vectored function for important case if RGB/BGR image
1626
static void run_select_row3(int width, uchar out[], uchar in1[], uchar in2[], uchar in3[])
1627
{
1628
int w = 0; // cycle index
1629
1630
#if CV_SIMD128
1631
for (; w <= width-16; w+=16)
1632
{
1633
v_uint8x16 a1, b1, c1;
1634
v_uint8x16 a2, b2, c2;
1635
v_uint8x16 mask;
1636
v_uint8x16 a, b, c;
1637
1638
v_load_deinterleave(&in1[3*w], a1, b1, c1);
1639
v_load_deinterleave(&in2[3*w], a2, b2, c2);
1640
1641
mask = v_load(&in3[w]);
1642
mask = mask != v_setzero_u8();
1643
1644
a = v_select(mask, a1, a2);
1645
b = v_select(mask, b1, b2);
1646
c = v_select(mask, c1, c2);
1647
1648
v_store_interleave(&out[3*w], a, b, c);
1649
}
1650
#endif
1651
1652
for (; w < width; w++)
1653
{
1654
out[3*w ] = in3[w]? in1[3*w ]: in2[3*w ];
1655
out[3*w + 1] = in3[w]? in1[3*w + 1]: in2[3*w + 1];
1656
out[3*w + 2] = in3[w]? in1[3*w + 2]: in2[3*w + 2];
1657
}
1658
}
1659
1660
// parameter chan is compile-time known constant, normally chan=1..4
1661
template<int chan, typename DST, typename SRC1, typename SRC2, typename SRC3>
1662
static void run_select_row(int width, DST out[], SRC1 in1[], SRC2 in2[], SRC3 in3[])
1663
{
1664
if (std::is_same<DST,uchar>::value && chan==3)
1665
{
1666
// manually vectored function for important case if RGB/BGR image
1667
run_select_row3(width, (uchar*)out, (uchar*)in1, (uchar*)in2, (uchar*)in3);
1668
return;
1669
}
1670
1671
// because `chan` is template parameter, its value is known at compilation time,
1672
// so that modern compilers would efficiently vectorize this cycle if chan==1
1673
// (if chan>1, compilers may need help with de-interleaving of the channels)
1674
for (int w=0; w < width; w++)
1675
{
1676
for (int c=0; c < chan; c++)
1677
{
1678
out[w*chan + c] = in3[w]? in1[w*chan + c]: in2[w*chan + c];
1679
}
1680
}
1681
}
1682
1683
template<typename DST, typename SRC1, typename SRC2, typename SRC3>
1684
static void run_select(Buffer &dst, const View &src1, const View &src2, const View &src3)
1685
{
1686
static_assert(std::is_same<DST , SRC1>::value, "wrong types");
1687
static_assert(std::is_same<DST , SRC2>::value, "wrong types");
1688
static_assert(std::is_same<uchar, SRC3>::value, "wrong types");
1689
1690
auto *out = dst.OutLine<DST>();
1691
1692
const auto *in1 = src1.InLine<SRC1>(0);
1693
const auto *in2 = src2.InLine<SRC2>(0);
1694
const auto *in3 = src3.InLine<SRC3>(0);
1695
1696
int width = dst.length();
1697
int chan = dst.meta().chan;
1698
1699
switch (chan)
1700
{
1701
case 1: run_select_row<1>(width, out, in1, in2, in3); break;
1702
case 2: run_select_row<2>(width, out, in1, in2, in3); break;
1703
case 3: run_select_row<3>(width, out, in1, in2, in3); break;
1704
case 4: run_select_row<4>(width, out, in1, in2, in3); break;
1705
default: CV_Error(cv::Error::StsBadArg, "unsupported number of channels");
1706
}
1707
}
1708
1709
GAPI_FLUID_KERNEL(GFluidSelect, cv::gapi::core::GSelect, false)
1710
{
1711
static const int Window = 1;
1712
1713
static void run(const View &src1, const View &src2, const View &src3, Buffer &dst)
1714
{
1715
// DST SRC1 SRC2 SRC3 OP __VA_ARGS__
1716
SELECT_(uchar , uchar , uchar , uchar, run_select, dst, src1, src2, src3);
1717
SELECT_(ushort, ushort, ushort, uchar, run_select, dst, src1, src2, src3);
1718
SELECT_( short, short, short, uchar, run_select, dst, src1, src2, src3);
1719
1720
CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
1721
}
1722
};
1723
1724
//----------------------------------------------------
1725
//
1726
// Fluid kernels: split, merge, polat2cart, cart2polar
1727
//
1728
//----------------------------------------------------
1729
1730
GAPI_FLUID_KERNEL(GFluidSplit3, cv::gapi::core::GSplit3, false)
1731
{
1732
static const int Window = 1;
1733
1734
static void run(const View &src, Buffer &dst1, Buffer &dst2, Buffer &dst3)
1735
{
1736
const auto *in = src.InLine<uchar>(0);
1737
auto *out1 = dst1.OutLine<uchar>();
1738
auto *out2 = dst2.OutLine<uchar>();
1739
auto *out3 = dst3.OutLine<uchar>();
1740
1741
GAPI_Assert(3 == src.meta().chan);
1742
int width = src.length();
1743
1744
int w = 0; // cycle counter
1745
1746
#if CV_SIMD128
1747
for (; w <= width-16; w+=16)
1748
{
1749
v_uint8x16 a, b, c;
1750
v_load_deinterleave(&in[3*w], a, b, c);
1751
v_store(&out1[w], a);
1752
v_store(&out2[w], b);
1753
v_store(&out3[w], c);
1754
}
1755
#endif
1756
1757
for (; w < width; w++)
1758
{
1759
out1[w] = in[3*w ];
1760
out2[w] = in[3*w + 1];
1761
out3[w] = in[3*w + 2];
1762
}
1763
}
1764
};
1765
1766
GAPI_FLUID_KERNEL(GFluidSplit4, cv::gapi::core::GSplit4, false)
1767
{
1768
static const int Window = 1;
1769
1770
static void run(const View &src, Buffer &dst1, Buffer &dst2, Buffer &dst3, Buffer &dst4)
1771
{
1772
const auto *in = src.InLine<uchar>(0);
1773
auto *out1 = dst1.OutLine<uchar>();
1774
auto *out2 = dst2.OutLine<uchar>();
1775
auto *out3 = dst3.OutLine<uchar>();
1776
auto *out4 = dst4.OutLine<uchar>();
1777
1778
GAPI_Assert(4 == src.meta().chan);
1779
int width = src.length();
1780
1781
int w = 0; // cycle counter
1782
1783
#if CV_SIMD128
1784
for (; w <= width-16; w+=16)
1785
{
1786
v_uint8x16 a, b, c, d;
1787
v_load_deinterleave(&in[4*w], a, b, c, d);
1788
v_store(&out1[w], a);
1789
v_store(&out2[w], b);
1790
v_store(&out3[w], c);
1791
v_store(&out4[w], d);
1792
}
1793
#endif
1794
1795
for (; w < width; w++)
1796
{
1797
out1[w] = in[4*w ];
1798
out2[w] = in[4*w + 1];
1799
out3[w] = in[4*w + 2];
1800
out4[w] = in[4*w + 3];
1801
}
1802
}
1803
};
1804
1805
GAPI_FLUID_KERNEL(GFluidMerge3, cv::gapi::core::GMerge3, false)
1806
{
1807
static const int Window = 1;
1808
1809
static void run(const View &src1, const View &src2, const View &src3, Buffer &dst)
1810
{
1811
const auto *in1 = src1.InLine<uchar>(0);
1812
const auto *in2 = src2.InLine<uchar>(0);
1813
const auto *in3 = src3.InLine<uchar>(0);
1814
auto *out = dst.OutLine<uchar>();
1815
1816
GAPI_Assert(3 == dst.meta().chan);
1817
int width = dst.length();
1818
1819
int w = 0; // cycle counter
1820
1821
#if CV_SIMD128
1822
for (; w <= width-16; w+=16)
1823
{
1824
v_uint8x16 a, b, c;
1825
a = v_load(&in1[w]);
1826
b = v_load(&in2[w]);
1827
c = v_load(&in3[w]);
1828
v_store_interleave(&out[3*w], a, b, c);
1829
}
1830
#endif
1831
1832
for (; w < width; w++)
1833
{
1834
out[3*w ] = in1[w];
1835
out[3*w + 1] = in2[w];
1836
out[3*w + 2] = in3[w];
1837
}
1838
}
1839
};
1840
1841
GAPI_FLUID_KERNEL(GFluidMerge4, cv::gapi::core::GMerge4, false)
1842
{
1843
static const int Window = 1;
1844
1845
static void run(const View &src1, const View &src2, const View &src3, const View &src4,
1846
Buffer &dst)
1847
{
1848
const auto *in1 = src1.InLine<uchar>(0);
1849
const auto *in2 = src2.InLine<uchar>(0);
1850
const auto *in3 = src3.InLine<uchar>(0);
1851
const auto *in4 = src4.InLine<uchar>(0);
1852
auto *out = dst.OutLine<uchar>();
1853
1854
GAPI_Assert(4 == dst.meta().chan);
1855
int width = dst.length();
1856
1857
int w = 0; // cycle counter
1858
1859
#if CV_SIMD128
1860
for (; w <= width-16; w+=16)
1861
{
1862
v_uint8x16 a, b, c, d;
1863
a = v_load(&in1[w]);
1864
b = v_load(&in2[w]);
1865
c = v_load(&in3[w]);
1866
d = v_load(&in4[w]);
1867
v_store_interleave(&out[4*w], a, b, c, d);
1868
}
1869
#endif
1870
1871
for (; w < width; w++)
1872
{
1873
out[4*w ] = in1[w];
1874
out[4*w + 1] = in2[w];
1875
out[4*w + 2] = in3[w];
1876
out[4*w + 3] = in4[w];
1877
}
1878
}
1879
};
1880
1881
GAPI_FLUID_KERNEL(GFluidPolarToCart, cv::gapi::core::GPolarToCart, false)
1882
{
1883
static const int Window = 1;
1884
1885
static void run(const View &src1, const View &src2, bool angleInDegrees,
1886
Buffer &dst1, Buffer &dst2)
1887
{
1888
GAPI_Assert(src1.meta().depth == CV_32F);
1889
GAPI_Assert(src2.meta().depth == CV_32F);
1890
GAPI_Assert(dst1.meta().depth == CV_32F);
1891
GAPI_Assert(dst2.meta().depth == CV_32F);
1892
1893
const auto * in1 = src1.InLine<float>(0);
1894
const auto * in2 = src2.InLine<float>(0);
1895
auto *out1 = dst1.OutLine<float>();
1896
auto *out2 = dst2.OutLine<float>();
1897
1898
int width = src1.length();
1899
int chan = src2.meta().chan;
1900
int length = width * chan;
1901
1902
// SIMD: compiler vectoring!
1903
for (int l=0; l < length; l++)
1904
{
1905
float angle = angleInDegrees?
1906
in2[l] * static_cast<float>(CV_PI / 180):
1907
in2[l];
1908
float magnitude = in1[l];
1909
float x = magnitude * std::cos(angle);
1910
float y = magnitude * std::sin(angle);
1911
out1[l] = x;
1912
out2[l] = y;
1913
}
1914
}
1915
};
1916
1917
GAPI_FLUID_KERNEL(GFluidCartToPolar, cv::gapi::core::GCartToPolar, false)
1918
{
1919
static const int Window = 1;
1920
1921
static void run(const View &src1, const View &src2, bool angleInDegrees,
1922
Buffer &dst1, Buffer &dst2)
1923
{
1924
GAPI_Assert(src1.meta().depth == CV_32F);
1925
GAPI_Assert(src2.meta().depth == CV_32F);
1926
GAPI_Assert(dst1.meta().depth == CV_32F);
1927
GAPI_Assert(dst2.meta().depth == CV_32F);
1928
1929
const auto * in1 = src1.InLine<float>(0);
1930
const auto * in2 = src2.InLine<float>(0);
1931
auto *out1 = dst1.OutLine<float>();
1932
auto *out2 = dst2.OutLine<float>();
1933
1934
int width = src1.length();
1935
int chan = src2.meta().chan;
1936
int length = width * chan;
1937
1938
// SIMD: compiler vectoring!
1939
for (int l=0; l < length; l++)
1940
{
1941
float x = in1[l];
1942
float y = in2[l];
1943
float magnitude = std::hypot(y, x);
1944
float angle_rad = std::atan2(y, x);
1945
float angle = angleInDegrees?
1946
angle_rad * static_cast<float>(180 / CV_PI):
1947
angle_rad;
1948
out1[l] = magnitude;
1949
out2[l] = angle;
1950
}
1951
}
1952
};
1953
1954
GAPI_FLUID_KERNEL(GFluidResize, cv::gapi::core::GResize, true)
1955
{
1956
static const int Window = 1;
1957
static const auto Kind = GFluidKernel::Kind::Resize;
1958
1959
constexpr static const int INTER_RESIZE_COEF_BITS = 11;
1960
constexpr static const int INTER_RESIZE_COEF_SCALE = 1 << INTER_RESIZE_COEF_BITS;
1961
constexpr static const short ONE = INTER_RESIZE_COEF_SCALE;
1962
1963
struct ResizeUnit
1964
{
1965
short alpha0;
1966
short alpha1;
1967
int s0;
1968
int s1;
1969
};
1970
1971
static ResizeUnit map(double ratio, int start, int max, int outCoord)
1972
{
1973
float f = static_cast<float>((outCoord + 0.5f) * ratio - 0.5f);
1974
int s = cvFloor(f);
1975
f -= s;
1976
1977
ResizeUnit ru;
1978
1979
ru.s0 = std::max(s - start, 0);
1980
ru.s1 = ((f == 0.0) || s + 1 >= max) ? s - start : s - start + 1;
1981
1982
ru.alpha0 = saturate_cast<short>((1.0f - f) * INTER_RESIZE_COEF_SCALE);
1983
ru.alpha1 = saturate_cast<short>((f) * INTER_RESIZE_COEF_SCALE);
1984
1985
return ru;
1986
}
1987
1988
static void initScratch(const cv::GMatDesc& in,
1989
cv::Size outSz, double /*fx*/, double /*fy*/, int /*interp*/,
1990
cv::gapi::fluid::Buffer &scratch)
1991
{
1992
CV_Assert(in.depth == CV_8U && in.chan == 3);
1993
1994
cv::Size scratch_size{static_cast<int>(outSz.width * sizeof(ResizeUnit)), 1};
1995
1996
cv::GMatDesc desc;
1997
desc.chan = 1;
1998
desc.depth = CV_8UC1;
1999
desc.size = to_own(scratch_size);
2000
2001
cv::gapi::fluid::Buffer buffer(desc);
2002
scratch = std::move(buffer);
2003
2004
ResizeUnit* mapX = scratch.OutLine<ResizeUnit>();
2005
double hRatio = (double)in.size.width / outSz.width;
2006
2007
for (int x = 0, w = outSz.width; x < w; x++)
2008
{
2009
mapX[x] = map(hRatio, 0, in.size.width, x);
2010
}
2011
}
2012
2013
static void resetScratch(cv::gapi::fluid::Buffer& /*scratch*/)
2014
{}
2015
2016
static void run(const cv::gapi::fluid::View& in, cv::Size /*sz*/, double /*fx*/, double /*fy*/, int /*interp*/,
2017
cv::gapi::fluid::Buffer& out, cv::gapi::fluid::Buffer &scratch)
2018
{
2019
double vRatio = (double)in.meta().size.height / out.meta().size.height;
2020
auto mapY = map(vRatio, in.y(), in.meta().size.height, out.y());
2021
2022
auto beta0 = mapY.alpha0;
2023
auto beta1 = mapY.alpha1;
2024
2025
const auto src0 = in.InLine <unsigned char>(mapY.s0);
2026
const auto src1 = in.InLine <unsigned char>(mapY.s1);
2027
2028
auto dst = out.OutLine<unsigned char>();
2029
2030
ResizeUnit* mapX = scratch.OutLine<ResizeUnit>();
2031
2032
for (int x = 0; x < out.length(); x++)
2033
{
2034
short alpha0 = mapX[x].alpha0;
2035
short alpha1 = mapX[x].alpha1;
2036
int sx0 = mapX[x].s0;
2037
int sx1 = mapX[x].s1;
2038
2039
int res00 = src0[3*sx0 ]*alpha0 + src0[3*(sx1) ]*alpha1;
2040
int res10 = src1[3*sx0 ]*alpha0 + src1[3*(sx1) ]*alpha1;
2041
2042
int res01 = src0[3*sx0 + 1]*alpha0 + src0[3*(sx1) + 1]*alpha1;
2043
int res11 = src1[3*sx0 + 1]*alpha0 + src1[3*(sx1) + 1]*alpha1;
2044
2045
int res02 = src0[3*sx0 + 2]*alpha0 + src0[3*(sx1) + 2]*alpha1;
2046
int res12 = src1[3*sx0 + 2]*alpha0 + src1[3*(sx1) + 2]*alpha1;
2047
2048
dst[3*x ] = uchar(( ((beta0 * (res00 >> 4)) >> 16) + ((beta1 * (res10 >> 4)) >> 16) + 2)>>2);
2049
dst[3*x + 1] = uchar(( ((beta0 * (res01 >> 4)) >> 16) + ((beta1 * (res11 >> 4)) >> 16) + 2)>>2);
2050
dst[3*x + 2] = uchar(( ((beta0 * (res02 >> 4)) >> 16) + ((beta1 * (res12 >> 4)) >> 16) + 2)>>2);
2051
}
2052
}
2053
};
2054
2055
} // namespace fliud
2056
} // namespace gapi
2057
} // namespace cv
2058
2059
cv::gapi::GKernelPackage cv::gapi::core::fluid::kernels()
2060
{
2061
using namespace cv::gapi::fluid;
2062
2063
return cv::gapi::kernels
2064
< GFluidAdd
2065
,GFluidSub
2066
,GFluidMul
2067
,GFluidDiv
2068
,GFluidAbsDiff
2069
,GFluidAnd
2070
,GFluidOr
2071
,GFluidXor
2072
,GFluidMin
2073
,GFluidMax
2074
,GFluidCmpGT
2075
,GFluidCmpGE
2076
,GFluidCmpLE
2077
,GFluidCmpLT
2078
,GFluidCmpEQ
2079
,GFluidCmpNE
2080
,GFluidAddW
2081
,GFluidNot
2082
,GFluidLUT
2083
,GFluidConvertTo
2084
,GFluidSplit3
2085
,GFluidSplit4
2086
,GFluidMerge3
2087
,GFluidMerge4
2088
,GFluidSelect
2089
,GFluidPolarToCart
2090
,GFluidCartToPolar
2091
,GFluidAddC
2092
,GFluidSubC
2093
,GFluidSubRC
2094
,GFluidMulC
2095
,GFluidMulCOld
2096
,GFluidDivC
2097
,GFluidDivRC
2098
,GFluidAbsDiffC
2099
,GFluidCmpGTScalar
2100
,GFluidCmpGEScalar
2101
,GFluidCmpLEScalar
2102
,GFluidCmpLTScalar
2103
,GFluidCmpEQScalar
2104
,GFluidCmpNEScalar
2105
,GFluidThreshold
2106
,GFluidInRange
2107
,GFluidResize
2108
#if 0
2109
,GFluidMean -- not fluid
2110
,GFluidSum -- not fluid
2111
,GFluidNormL1 -- not fluid
2112
,GFluidNormL2 -- not fluid
2113
,GFluidNormInf -- not fluid
2114
,GFluidIntegral -- not fluid
2115
,GFluidThresholdOT -- not fluid
2116
,GFluidResize -- not fluid (?)
2117
,GFluidRemap -- not fluid
2118
,GFluidFlip -- not fluid
2119
,GFluidCrop -- not fluid
2120
,GFluidConcatHor
2121
,GFluidConcatVert -- not fluid
2122
#endif
2123
>();
2124
}
2125
2126
#endif // !defined(GAPI_STANDALONE)
2127
2128