Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/dnn/src/layers/elementwise_layers.cpp
16337 views
1
/*M///////////////////////////////////////////////////////////////////////////////////////
2
//
3
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4
//
5
// By downloading, copying, installing or using the software you agree to this license.
6
// If you do not agree to this license, do not download, install,
7
// copy or use the software.
8
//
9
//
10
// License Agreement
11
// For Open Source Computer Vision Library
12
//
13
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14
// Copyright (C) 2017, Intel Corporation, all rights reserved.
15
// Third party copyrights are property of their respective owners.
16
//
17
// Redistribution and use in source and binary forms, with or without modification,
18
// are permitted provided that the following conditions are met:
19
//
20
// * Redistribution's of source code must retain the above copyright notice,
21
// this list of conditions and the following disclaimer.
22
//
23
// * Redistribution's in binary form must reproduce the above copyright notice,
24
// this list of conditions and the following disclaimer in the documentation
25
// and/or other materials provided with the distribution.
26
//
27
// * The name of the copyright holders may not be used to endorse or promote products
28
// derived from this software without specific prior written permission.
29
//
30
// This software is provided by the copyright holders and contributors "as is" and
31
// any express or implied warranties, including, but not limited to, the implied
32
// warranties of merchantability and fitness for a particular purpose are disclaimed.
33
// In no event shall the Intel Corporation or contributors be liable for any direct,
34
// indirect, incidental, special, exemplary, or consequential damages
35
// (including, but not limited to, procurement of substitute goods or services;
36
// loss of use, data, or profits; or business interruption) however caused
37
// and on any theory of liability, whether in contract, strict liability,
38
// or tort (including negligence or otherwise) arising in any way out of
39
// the use of this software, even if advised of the possibility of such damage.
40
//
41
//M*/
42
43
#include "../precomp.hpp"
44
#include "layers_common.hpp"
45
#include "../op_halide.hpp"
46
#include "../op_inf_engine.hpp"
47
#include "../op_vkcom.hpp"
48
#include "opencv2/imgproc.hpp"
49
#include <opencv2/dnn/shape_utils.hpp>
50
#include <iostream>
51
52
#ifdef HAVE_OPENCL
53
#include "opencl_kernels_dnn.hpp"
54
#endif
55
56
namespace cv
57
{
58
namespace dnn
59
{
60
61
using std::abs;
62
using std::exp;
63
using std::tanh;
64
using std::pow;
65
66
template<typename Func>
67
class ElementWiseLayer : public Func::Layer
68
{
69
public:
70
class PBody : public cv::ParallelLoopBody
71
{
72
public:
73
const Func* func_;
74
const Mat* src_;
75
Mat* dst_;
76
int nstripes_;
77
78
PBody(const Func &func, const Mat &src, Mat& dst, int nstripes)
79
{
80
func_ = &func;
81
src_ = &src;
82
dst_ = &dst;
83
nstripes_ = nstripes;
84
}
85
86
void operator()(const Range &r) const CV_OVERRIDE
87
{
88
int nstripes = nstripes_, nsamples = 1, outCn = 1;
89
size_t planeSize = 1;
90
91
if (src_->dims > 1)
92
{
93
nsamples = src_->size[0];
94
outCn = src_->size[1];
95
}
96
else
97
outCn = src_->size[0];
98
99
for (int i = 2; i < src_->dims; ++i)
100
planeSize *= src_->size[i];
101
102
size_t stripeSize = (planeSize + nstripes - 1)/nstripes;
103
size_t stripeStart = r.start*stripeSize;
104
size_t stripeEnd = std::min(r.end*stripeSize, planeSize);
105
106
for( int i = 0; i < nsamples; i++ )
107
{
108
const float* srcptr = src_->ptr<float>(i) + stripeStart;
109
float* dstptr = dst_->ptr<float>(i) + stripeStart;
110
func_->apply(srcptr, dstptr, (int)(stripeEnd - stripeStart), planeSize, 0, outCn);
111
}
112
}
113
};
114
115
ElementWiseLayer(const Func &f=Func()) : run_parallel(false) { func = f; }
116
117
virtual bool supportBackend(int backendId) CV_OVERRIDE
118
{
119
return func.supportBackend(backendId, this->preferableTarget);
120
}
121
122
virtual Ptr<BackendNode> tryAttach(const Ptr<BackendNode>& node) CV_OVERRIDE
123
{
124
switch (node->backendId)
125
{
126
case DNN_BACKEND_HALIDE:
127
{
128
#ifdef HAVE_HALIDE
129
auto base = node.dynamicCast<HalideBackendNode>();
130
Halide::Func& input = base->funcs.back();
131
Halide::Var x("x"), y("y"), c("c"), n("n");
132
Halide::Func top = (this->name.empty() ? Halide::Func() : Halide::Func(this->name));
133
func.attachHalide(input(x, y, c, n), top);
134
return Ptr<BackendNode>(new HalideBackendNode(base, top));
135
#endif // HAVE_HALIDE
136
break;
137
}
138
}
139
return Ptr<BackendNode>();
140
}
141
142
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
143
{
144
#ifdef HAVE_HALIDE
145
Halide::Buffer<float> input = halideBuffer(inputs[0]);
146
Halide::Var x("x"), y("y"), c("c"), n("n");
147
Halide::Func top = (this->name.empty() ? Halide::Func() : Halide::Func(this->name));
148
func.attachHalide(input(x, y, c, n), top);
149
return Ptr<BackendNode>(new HalideBackendNode(top));
150
#endif // HAVE_HALIDE
151
return Ptr<BackendNode>();
152
}
153
154
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
155
{
156
#ifdef HAVE_INF_ENGINE
157
InferenceEngine::LayerParams lp;
158
lp.name = this->name;
159
lp.precision = InferenceEngine::Precision::FP32;
160
return Ptr<BackendNode>(new InfEngineBackendNode(func.initInfEngine(lp)));
161
#endif // HAVE_INF_ENGINE
162
return Ptr<BackendNode>();
163
}
164
165
virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
166
{
167
#ifdef HAVE_VULKAN
168
return Ptr<BackendNode>(new VkComBackendNode(inputs, func.initVkCom()));
169
#endif // HAVE_VULKAN
170
return Ptr<BackendNode>();
171
}
172
173
virtual bool tryFuse(Ptr<dnn::Layer>& top) CV_OVERRIDE
174
{
175
return func.tryFuse(top);
176
}
177
178
void getScaleShift(Mat& scale_, Mat& shift_) const CV_OVERRIDE
179
{
180
func.getScaleShift(scale_, shift_);
181
}
182
183
bool getMemoryShapes(const std::vector<MatShape> &inputs,
184
const int requiredOutputs,
185
std::vector<MatShape> &outputs,
186
std::vector<MatShape> &internals) const CV_OVERRIDE
187
{
188
Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
189
return true;
190
}
191
192
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
193
{
194
CV_TRACE_FUNCTION();
195
196
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(this->preferableTarget),
197
func.applyOCL(inputs_arr, outputs_arr, internals_arr))
198
199
if (inputs_arr.depth() == CV_16S)
200
{
201
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
202
return;
203
}
204
205
std::vector<Mat> inputs, outputs;
206
inputs_arr.getMatVector(inputs);
207
outputs_arr.getMatVector(outputs);
208
209
for (size_t i = 0; i < inputs.size(); i++)
210
{
211
const Mat &src = inputs[i];
212
Mat &dst = outputs[i];
213
CV_Assert(src.size == dst.size && src.type() == dst.type() &&
214
src.isContinuous() && dst.isContinuous() && src.type() == CV_32F);
215
216
const int nstripes = getNumThreads();
217
PBody body(func, src, dst, nstripes);
218
parallel_for_(Range(0, nstripes), body, nstripes);
219
}
220
}
221
222
void forwardSlice(const float* src, float* dst, int len, size_t planeSize, int cn0, int cn1) const CV_OVERRIDE
223
{
224
func.apply(src, dst, len, planeSize, cn0, cn1);
225
}
226
227
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
228
const std::vector<MatShape> &outputs) const CV_OVERRIDE
229
{
230
long flops = 0;
231
for (int i = 0; i < outputs.size(); i++)
232
{
233
flops += total(outputs[i]) * func.getFLOPSPerElement();
234
}
235
return flops;
236
}
237
238
Func func;
239
bool run_parallel;
240
};
241
242
#ifdef HAVE_OPENCL
243
static String oclGetTMacro(const UMat &m)
244
{
245
String str_name = ocl::typeToStr(m.type());
246
247
if (str_name == "short")
248
str_name = "half";
249
250
return format("-DT=%s -Dconvert_T=convert_%s ", str_name.c_str(), str_name.c_str());
251
}
252
#endif
253
254
struct ReLUFunctor
255
{
256
typedef ReLULayer Layer;
257
float slope;
258
259
explicit ReLUFunctor(float slope_=1.f) : slope(slope_) {}
260
261
bool supportBackend(int backendId, int)
262
{
263
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE ||
264
backendId == DNN_BACKEND_INFERENCE_ENGINE ||
265
backendId == DNN_BACKEND_VKCOM;
266
}
267
268
void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
269
{
270
float s = slope;
271
for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
272
{
273
int i = 0;
274
#if CV_SIMD128
275
v_float32x4 s4 = v_setall_f32(s), z = v_setzero_f32();
276
for( ; i <= len - 16; i += 16 )
277
{
278
v_float32x4 x0 = v_load(srcptr + i);
279
v_float32x4 x1 = v_load(srcptr + i + 4);
280
v_float32x4 x2 = v_load(srcptr + i + 8);
281
v_float32x4 x3 = v_load(srcptr + i + 12);
282
x0 = v_select(x0 >= z, x0, x0*s4);
283
x1 = v_select(x1 >= z, x1, x1*s4);
284
x2 = v_select(x2 >= z, x2, x2*s4);
285
x3 = v_select(x3 >= z, x3, x3*s4);
286
v_store(dstptr + i, x0);
287
v_store(dstptr + i + 4, x1);
288
v_store(dstptr + i + 8, x2);
289
v_store(dstptr + i + 12, x3);
290
}
291
#endif
292
for( ; i < len; i++ )
293
{
294
float x = srcptr[i];
295
dstptr[i] = x >= 0.f ? x : s*x;
296
}
297
}
298
}
299
300
#ifdef HAVE_OPENCL
301
bool initKernel(ocl::Kernel &ker, const UMat &src) const
302
{
303
const char *buildoptSlope = (slope == 0) ? "-DRELU_NO_SLOPE" : "";
304
String buildopt = oclGetTMacro(src) + buildoptSlope;
305
306
if (!ker.create("ReLUForward", ocl::dnn::activations_oclsrc, buildopt))
307
return false;
308
309
if (slope != 0)
310
ker.set(3, (float)slope);
311
312
return true;
313
}
314
315
bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
316
{
317
std::vector<UMat> inputs;
318
std::vector<UMat> outputs;
319
320
inps.getUMatVector(inputs);
321
outs.getUMatVector(outputs);
322
323
for (size_t i = 0; i < inputs.size(); i++)
324
{
325
UMat& src = inputs[i];
326
UMat& dst = outputs[i];
327
CV_Assert(src.isContinuous() && dst.isContinuous() && !src.offset && !dst.offset);
328
329
ocl::Kernel kernel;
330
CV_Assert(initKernel(kernel, src));
331
kernel.set(0, (int)src.total());
332
kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
333
kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
334
335
size_t gSize = src.total();
336
CV_Assert(kernel.run(1, &gSize, NULL, false));
337
}
338
339
return true;
340
}
341
#endif
342
343
#ifdef HAVE_HALIDE
344
void attachHalide(const Halide::Expr& input, Halide::Func& top)
345
{
346
Halide::Var x("x"), y("y"), c("c"), n("n");
347
if (slope)
348
{
349
top(x, y, c, n) = select(input >= 0.0f, input, slope * input);
350
}
351
else
352
{
353
top(x, y, c, n) = max(input, 0.0f);
354
}
355
}
356
#endif // HAVE_HALIDE
357
358
#ifdef HAVE_INF_ENGINE
359
InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)
360
{
361
lp.type = "ReLU";
362
std::shared_ptr<InferenceEngine::ReLULayer> ieLayer(new InferenceEngine::ReLULayer(lp));
363
ieLayer->negative_slope = slope;
364
ieLayer->params["negative_slope"] = format("%f", slope);
365
return ieLayer;
366
}
367
#endif // HAVE_INF_ENGINE
368
369
#ifdef HAVE_VULKAN
370
std::shared_ptr<vkcom::OpBase> initVkCom()
371
{
372
std::shared_ptr<vkcom::OpBase> op(new vkcom::OpReLU(slope));
373
return op;
374
}
375
#endif // HAVE_VULKAN
376
377
378
379
bool tryFuse(Ptr<dnn::Layer>&) { return false; }
380
381
void getScaleShift(Mat&, Mat&) const {}
382
383
int64 getFLOPSPerElement() const { return 1; }
384
};
385
386
struct ReLU6Functor
387
{
388
typedef ReLU6Layer Layer;
389
float minValue, maxValue;
390
391
ReLU6Functor(float minValue_ = 0.0f, float maxValue_ = 6.0f)
392
: minValue(minValue_), maxValue(maxValue_)
393
{
394
CV_Assert(minValue <= maxValue);
395
}
396
397
bool supportBackend(int backendId, int)
398
{
399
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE ||
400
backendId == DNN_BACKEND_INFERENCE_ENGINE;
401
}
402
403
void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
404
{
405
for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
406
{
407
int i = 0;
408
#if CV_SIMD128
409
v_float32x4 minV = v_setall_f32(minValue), maxV = v_setall_f32(maxValue);
410
for( ; i <= len - 16; i += 16 )
411
{
412
v_float32x4 x0 = v_load(srcptr + i);
413
v_float32x4 x1 = v_load(srcptr + i + 4);
414
v_float32x4 x2 = v_load(srcptr + i + 8);
415
v_float32x4 x3 = v_load(srcptr + i + 12);
416
x0 = v_min(v_max(minV, x0), maxV);
417
x1 = v_min(v_max(minV, x1), maxV);
418
x2 = v_min(v_max(minV, x2), maxV);
419
x3 = v_min(v_max(minV, x3), maxV);
420
v_store(dstptr + i, x0);
421
v_store(dstptr + i + 4, x1);
422
v_store(dstptr + i + 8, x2);
423
v_store(dstptr + i + 12, x3);
424
}
425
#endif
426
for( ; i < len; i++ )
427
{
428
float x = srcptr[i];
429
if (x >= minValue)
430
dstptr[i] = x <= maxValue ? x : maxValue;
431
else
432
dstptr[i] = minValue;
433
}
434
}
435
}
436
437
#ifdef HAVE_OPENCL
438
bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
439
{
440
std::vector<UMat> inputs;
441
std::vector<UMat> outputs;
442
443
inps.getUMatVector(inputs);
444
outs.getUMatVector(outputs);
445
String buildopt = oclGetTMacro(inputs[0]);
446
447
for (size_t i = 0; i < inputs.size(); i++)
448
{
449
UMat& src = inputs[i];
450
UMat& dst = outputs[i];
451
452
ocl::Kernel kernel("ReLU6Forward", ocl::dnn::activations_oclsrc, buildopt);
453
kernel.set(0, (int)src.total());
454
kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
455
kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
456
kernel.set(3, (float)minValue);
457
kernel.set(4, (float)maxValue);
458
459
size_t gSize = src.total();
460
CV_Assert(kernel.run(1, &gSize, NULL, false));
461
}
462
463
return true;
464
}
465
#endif
466
467
#ifdef HAVE_HALIDE
468
void attachHalide(const Halide::Expr& input, Halide::Func& top)
469
{
470
Halide::Var x("x"), y("y"), c("c"), n("n");
471
top(x, y, c, n) = clamp(input, minValue, maxValue);
472
}
473
#endif // HAVE_HALIDE
474
475
#ifdef HAVE_INF_ENGINE
476
InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)
477
{
478
lp.type = "Clamp";
479
std::shared_ptr<InferenceEngine::ClampLayer> ieLayer(new InferenceEngine::ClampLayer(lp));
480
ieLayer->min_value = minValue;
481
ieLayer->max_value = maxValue;
482
ieLayer->params["min"] = format("%f", minValue);
483
ieLayer->params["max"] = format("%f", maxValue);
484
return ieLayer;
485
}
486
#endif // HAVE_INF_ENGINE
487
488
#ifdef HAVE_VULKAN
489
std::shared_ptr<vkcom::OpBase> initVkCom()
490
{
491
// TODO: add vkcom implementation
492
return std::shared_ptr<vkcom::OpBase>();
493
}
494
#endif // HAVE_VULKAN
495
496
bool tryFuse(Ptr<dnn::Layer>&) { return false; }
497
498
void getScaleShift(Mat&, Mat&) const {}
499
500
int64 getFLOPSPerElement() const { return 2; }
501
};
502
503
struct TanHFunctor
504
{
505
typedef TanHLayer Layer;
506
507
bool supportBackend(int backendId, int)
508
{
509
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE ||
510
backendId == DNN_BACKEND_INFERENCE_ENGINE;
511
}
512
513
void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
514
{
515
for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
516
{
517
for( int i = 0; i < len; i++ )
518
{
519
float x = srcptr[i];
520
dstptr[i] = tanh(x);
521
}
522
}
523
}
524
525
#ifdef HAVE_OPENCL
526
bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
527
{
528
std::vector<UMat> inputs;
529
std::vector<UMat> outputs;
530
531
inps.getUMatVector(inputs);
532
outs.getUMatVector(outputs);
533
String buildopt = oclGetTMacro(inputs[0]);
534
535
for (size_t i = 0; i < inputs.size(); i++)
536
{
537
UMat& src = inputs[i];
538
UMat& dst = outputs[i];
539
540
ocl::Kernel kernel("TanHForward", ocl::dnn::activations_oclsrc, buildopt);
541
kernel.set(0, (int)src.total());
542
kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
543
kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
544
545
size_t gSize = src.total();
546
CV_Assert(kernel.run(1, &gSize, NULL, false));
547
}
548
549
return true;
550
}
551
#endif
552
553
#ifdef HAVE_HALIDE
554
void attachHalide(const Halide::Expr& input, Halide::Func& top)
555
{
556
Halide::Var x("x"), y("y"), c("c"), n("n");
557
top(x, y, c, n) = tanh(input);
558
}
559
#endif // HAVE_HALIDE
560
561
#ifdef HAVE_INF_ENGINE
562
InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)
563
{
564
lp.type = "TanH";
565
std::shared_ptr<InferenceEngine::CNNLayer> ieLayer(new InferenceEngine::CNNLayer(lp));
566
return ieLayer;
567
}
568
#endif // HAVE_INF_ENGINE
569
570
#ifdef HAVE_VULKAN
571
std::shared_ptr<vkcom::OpBase> initVkCom()
572
{
573
// TODO: add vkcom implementation
574
return std::shared_ptr<vkcom::OpBase>();
575
}
576
#endif // HAVE_VULKAN
577
578
bool tryFuse(Ptr<dnn::Layer>&) { return false; }
579
580
void getScaleShift(Mat&, Mat&) const {}
581
582
int64 getFLOPSPerElement() const { return 1; }
583
};
584
585
struct SigmoidFunctor
586
{
587
typedef SigmoidLayer Layer;
588
589
bool supportBackend(int backendId, int)
590
{
591
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE ||
592
backendId == DNN_BACKEND_INFERENCE_ENGINE;
593
}
594
595
void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
596
{
597
for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
598
{
599
for( int i = 0; i < len; i++ )
600
{
601
float x = srcptr[i];
602
dstptr[i] = 1.f/(1.f + exp(-x));
603
}
604
}
605
}
606
607
#ifdef HAVE_OPENCL
608
bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
609
{
610
std::vector<UMat> inputs;
611
std::vector<UMat> outputs;
612
613
inps.getUMatVector(inputs);
614
outs.getUMatVector(outputs);
615
String buildopt = oclGetTMacro(inputs[0]);
616
617
for (size_t i = 0; i < inputs.size(); i++)
618
{
619
UMat& src = inputs[i];
620
UMat& dst = outputs[i];
621
622
ocl::Kernel kernel("SigmoidForward", ocl::dnn::activations_oclsrc, buildopt);
623
kernel.set(0, (int)src.total());
624
kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
625
kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
626
627
size_t gSize = src.total();
628
CV_Assert(kernel.run(1, &gSize, NULL, false));
629
}
630
631
return true;
632
}
633
#endif
634
635
#ifdef HAVE_HALIDE
636
void attachHalide(const Halide::Expr& input, Halide::Func& top)
637
{
638
Halide::Var x("x"), y("y"), c("c"), n("n");
639
top(x, y, c, n) = 1.0f / (1.0f + exp(-input));
640
}
641
#endif // HAVE_HALIDE
642
643
#ifdef HAVE_INF_ENGINE
644
InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)
645
{
646
lp.type = "Sigmoid";
647
std::shared_ptr<InferenceEngine::CNNLayer> ieLayer(new InferenceEngine::CNNLayer(lp));
648
return ieLayer;
649
}
650
#endif // HAVE_INF_ENGINE
651
652
#ifdef HAVE_VULKAN
653
std::shared_ptr<vkcom::OpBase> initVkCom()
654
{
655
// TODO: add vkcom implementation
656
return std::shared_ptr<vkcom::OpBase>();
657
}
658
#endif // HAVE_VULKAN
659
660
bool tryFuse(Ptr<dnn::Layer>&) { return false; }
661
662
void getScaleShift(Mat&, Mat&) const {}
663
664
int64 getFLOPSPerElement() const { return 3; }
665
};
666
667
struct ELUFunctor
668
{
669
typedef ELULayer Layer;
670
671
explicit ELUFunctor() {}
672
673
bool supportBackend(int backendId, int)
674
{
675
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE ||
676
backendId == DNN_BACKEND_INFERENCE_ENGINE;
677
}
678
679
void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
680
{
681
for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
682
{
683
for(int i = 0; i < len; i++ )
684
{
685
float x = srcptr[i];
686
dstptr[i] = x >= 0.f ? x : exp(x) - 1;
687
}
688
}
689
}
690
691
#ifdef HAVE_OPENCL
692
bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
693
{
694
std::vector<UMat> inputs;
695
std::vector<UMat> outputs;
696
697
inps.getUMatVector(inputs);
698
outs.getUMatVector(outputs);
699
String buildopt = oclGetTMacro(inputs[0]);
700
701
for (size_t i = 0; i < inputs.size(); i++)
702
{
703
UMat& src = inputs[i];
704
UMat& dst = outputs[i];
705
706
ocl::Kernel kernel("ELUForward", ocl::dnn::activations_oclsrc, buildopt);
707
kernel.set(0, (int)src.total());
708
kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
709
kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
710
711
size_t gSize = src.total();
712
CV_Assert(kernel.run(1, &gSize, NULL, false));
713
}
714
715
return true;
716
}
717
#endif
718
719
#ifdef HAVE_HALIDE
720
void attachHalide(const Halide::Expr& input, Halide::Func& top)
721
{
722
Halide::Var x("x"), y("y"), c("c"), n("n");
723
top(x, y, c, n) = select(input >= 0.0f, input, exp(input) - 1);
724
}
725
#endif // HAVE_HALIDE
726
727
#ifdef HAVE_INF_ENGINE
728
InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)
729
{
730
lp.type = "ELU";
731
return InferenceEngine::CNNLayerPtr(new InferenceEngine::CNNLayer(lp));
732
}
733
#endif // HAVE_INF_ENGINE
734
735
#ifdef HAVE_VULKAN
736
std::shared_ptr<vkcom::OpBase> initVkCom()
737
{
738
// TODO: add vkcom implementation
739
return std::shared_ptr<vkcom::OpBase>();
740
}
741
#endif // HAVE_VULKAN
742
743
bool tryFuse(Ptr<dnn::Layer>&) { return false; }
744
745
void getScaleShift(Mat&, Mat&) const {}
746
747
int64 getFLOPSPerElement() const { return 2; }
748
};
749
750
struct AbsValFunctor
751
{
752
typedef AbsLayer Layer;
753
754
bool supportBackend(int backendId, int)
755
{
756
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE;
757
}
758
759
void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
760
{
761
for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
762
{
763
for( int i = 0; i < len; i++ )
764
{
765
float x = srcptr[i];
766
dstptr[i] = abs(x);
767
}
768
}
769
}
770
771
#ifdef HAVE_OPENCL
772
bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
773
{
774
std::vector<UMat> inputs;
775
std::vector<UMat> outputs;
776
777
inps.getUMatVector(inputs);
778
outs.getUMatVector(outputs);
779
String buildopt = oclGetTMacro(inputs[0]);
780
781
for (size_t i = 0; i < inputs.size(); i++)
782
{
783
UMat& src = inputs[i];
784
UMat& dst = outputs[i];
785
786
ocl::Kernel kernel("AbsValForward", ocl::dnn::activations_oclsrc, buildopt);
787
kernel.set(0, (int)src.total());
788
kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
789
kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
790
791
size_t gSize = src.total();
792
CV_Assert(kernel.run(1, &gSize, NULL, false));
793
}
794
795
return true;
796
}
797
#endif
798
799
#ifdef HAVE_HALIDE
800
void attachHalide(const Halide::Expr& input, Halide::Func& top)
801
{
802
Halide::Var x("x"), y("y"), c("c"), n("n");
803
top(x, y, c, n) = abs(input);
804
}
805
#endif // HAVE_HALIDE
806
807
#ifdef HAVE_INF_ENGINE
808
InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)
809
{
810
CV_Error(Error::StsNotImplemented, "Abs");
811
return InferenceEngine::CNNLayerPtr();
812
}
813
#endif // HAVE_INF_ENGINE
814
815
#ifdef HAVE_VULKAN
816
std::shared_ptr<vkcom::OpBase> initVkCom()
817
{
818
// TODO: add vkcom implementation
819
return std::shared_ptr<vkcom::OpBase>();
820
}
821
#endif // HAVE_VULKAN
822
823
bool tryFuse(Ptr<dnn::Layer>&) { return false; }
824
825
void getScaleShift(Mat&, Mat&) const {}
826
827
int64 getFLOPSPerElement() const { return 1; }
828
};
829
830
struct BNLLFunctor
831
{
832
typedef BNLLLayer Layer;
833
834
bool supportBackend(int backendId, int)
835
{
836
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE;
837
}
838
839
void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
840
{
841
for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
842
{
843
for( int i = 0; i < len; i++ )
844
{
845
float x = srcptr[i];
846
dstptr[i] = log(1.f + exp(-abs(x)));
847
}
848
}
849
}
850
851
#ifdef HAVE_OPENCL
852
bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
853
{
854
// TODO: implement OCL version
855
return false;
856
}
857
#endif
858
859
#ifdef HAVE_HALIDE
860
void attachHalide(const Halide::Expr& input, Halide::Func& top)
861
{
862
Halide::Var x("x"), y("y"), c("c"), n("n");
863
top(x, y, c, n) = log(1.0f + exp(-abs(input)));
864
}
865
#endif // HAVE_HALIDE
866
867
#ifdef HAVE_INF_ENGINE
868
InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)
869
{
870
CV_Error(Error::StsNotImplemented, "BNLL");
871
return InferenceEngine::CNNLayerPtr();
872
}
873
#endif // HAVE_INF_ENGINE
874
875
#ifdef HAVE_VULKAN
876
std::shared_ptr<vkcom::OpBase> initVkCom()
877
{
878
// TODO: add vkcom implementation
879
return std::shared_ptr<vkcom::OpBase>();
880
}
881
#endif // HAVE_VULKAN
882
883
bool tryFuse(Ptr<dnn::Layer>&) { return false; }
884
885
void getScaleShift(Mat&, Mat&) const {}
886
887
int64 getFLOPSPerElement() const { return 5; }
888
};
889
890
struct PowerFunctor
891
{
892
typedef PowerLayer Layer;
893
894
float power;
895
float scale;
896
float shift;
897
898
explicit PowerFunctor(float power_ = 1.f, float scale_ = 1.f, float shift_ = 0.f)
899
: power(power_), scale(scale_), shift(shift_) {}
900
901
bool supportBackend(int backendId, int targetId)
902
{
903
if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
904
return (targetId != DNN_TARGET_OPENCL && targetId != DNN_TARGET_OPENCL_FP16) || power == 1.0;
905
else
906
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE;
907
}
908
909
void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
910
{
911
float a = scale, b = shift, p = power;
912
if( p == 1.f )
913
{
914
for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
915
{
916
for( int i = 0; i < len; i++ )
917
{
918
float x = srcptr[i];
919
dstptr[i] = a*x + b;
920
}
921
}
922
}
923
else
924
{
925
for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
926
{
927
for( int i = 0; i < len; i++ )
928
{
929
float x = srcptr[i];
930
dstptr[i] = pow(a*x + b, p);
931
}
932
}
933
}
934
}
935
936
#ifdef HAVE_OPENCL
937
bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
938
{
939
std::vector<UMat> inputs;
940
std::vector<UMat> outputs;
941
942
inps.getUMatVector(inputs);
943
outs.getUMatVector(outputs);
944
String buildopt = oclGetTMacro(inputs[0]);
945
946
for (size_t i = 0; i < inputs.size(); i++)
947
{
948
UMat& src = inputs[i];
949
UMat& dst = outputs[i];
950
951
ocl::Kernel kernel("PowForward", ocl::dnn::activations_oclsrc, buildopt);
952
kernel.set(0, (int)src.total());
953
kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
954
kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
955
kernel.set(3, (float)power);
956
kernel.set(4, (float)scale);
957
kernel.set(5, (float)shift);
958
959
size_t gSize = src.total();
960
CV_Assert(kernel.run(1, &gSize, NULL, false));
961
}
962
963
return true;
964
}
965
#endif
966
967
#ifdef HAVE_HALIDE
968
void attachHalide(const Halide::Expr& input, Halide::Func& top)
969
{
970
Halide::Var x("x"), y("y"), c("c"), n("n");
971
Halide::Expr topExpr = (scale == 1.0f ? input : input * scale);
972
if (shift)
973
{
974
topExpr += shift;
975
}
976
if (power != 1.0f)
977
{
978
topExpr = pow(topExpr, power);
979
}
980
top(x, y, c, n) = topExpr;
981
}
982
#endif // HAVE_HALIDE
983
984
#ifdef HAVE_INF_ENGINE
985
InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)
986
{
987
if (power == 1.0f && scale == 1.0f && shift == 0.0f)
988
{
989
// It looks like there is a bug in Inference Engine for DNN_TARGET_OPENCL and DNN_TARGET_OPENCL_FP16
990
// if power layer do nothing so we replace it to Identity.
991
lp.type = "Split";
992
return std::shared_ptr<InferenceEngine::SplitLayer>(new InferenceEngine::SplitLayer(lp));
993
}
994
else
995
{
996
lp.type = "Power";
997
std::shared_ptr<InferenceEngine::PowerLayer> ieLayer(new InferenceEngine::PowerLayer(lp));
998
ieLayer->power = power;
999
ieLayer->scale = scale;
1000
ieLayer->offset = shift;
1001
return ieLayer;
1002
}
1003
}
1004
#endif // HAVE_INF_ENGINE
1005
1006
#ifdef HAVE_VULKAN
1007
std::shared_ptr<vkcom::OpBase> initVkCom()
1008
{
1009
// TODO: add vkcom implementation
1010
return std::shared_ptr<vkcom::OpBase>();
1011
}
1012
#endif // HAVE_VULKAN
1013
1014
bool tryFuse(Ptr<dnn::Layer>& top)
1015
{
1016
if (power != 1.0f && shift != 0.0f)
1017
return false;
1018
1019
Mat w, b;
1020
top->getScaleShift(w, b);
1021
if ((w.empty() && b.empty()) || w.total() > 1 || b.total() > 1)
1022
return false;
1023
1024
float nextScale = w.empty() ? 1.0f : w.at<float>(0);
1025
float nextShift = b.empty() ? 0.0f : b.at<float>(0);
1026
scale = std::pow(scale, power) * nextScale;
1027
shift = nextScale * shift + nextShift;
1028
return true;
1029
}
1030
1031
void getScaleShift(Mat& _scale, Mat& _shift) const
1032
{
1033
if (power == 1.0f)
1034
{
1035
_scale = Mat(1, 1, CV_32F, Scalar(scale));
1036
_shift = Mat(1, 1, CV_32F, Scalar(shift));
1037
}
1038
}
1039
1040
int64 getFLOPSPerElement() const { return power == 1 ? 2 : 10; }
1041
};
1042
1043
1044
struct ChannelsPReLUFunctor
1045
{
1046
typedef ChannelsPReLULayer Layer;
1047
Mat scale;
1048
#ifdef HAVE_OPENCL
1049
UMat scale_umat;
1050
#endif
1051
1052
explicit ChannelsPReLUFunctor(const Mat& scale_=Mat()) : scale(scale_)
1053
{
1054
}
1055
1056
bool supportBackend(int backendId, int)
1057
{
1058
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE;
1059
}
1060
1061
void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
1062
{
1063
CV_Assert(scale.isContinuous() && scale.type() == CV_32F);
1064
1065
const float* scaleptr = scale.ptr<float>();
1066
CV_Assert( 0 <= cn0 && cn0 < cn1 && cn1 <= (int)scale.total() );
1067
1068
for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
1069
{
1070
float s = scaleptr[cn];
1071
int i = 0;
1072
#if CV_SIMD128
1073
v_float32x4 s4 = v_setall_f32(s), z = v_setzero_f32();
1074
for( ; i <= len - 16; i += 16 )
1075
{
1076
v_float32x4 x0 = v_load(srcptr + i);
1077
v_float32x4 x1 = v_load(srcptr + i + 4);
1078
v_float32x4 x2 = v_load(srcptr + i + 8);
1079
v_float32x4 x3 = v_load(srcptr + i + 12);
1080
x0 = v_select(x0 >= z, x0, x0*s4);
1081
x1 = v_select(x1 >= z, x1, x1*s4);
1082
x2 = v_select(x2 >= z, x2, x2*s4);
1083
x3 = v_select(x3 >= z, x3, x3*s4);
1084
v_store(dstptr + i, x0);
1085
v_store(dstptr + i + 4, x1);
1086
v_store(dstptr + i + 8, x2);
1087
v_store(dstptr + i + 12, x3);
1088
}
1089
#endif
1090
for( ; i < len; i++ )
1091
{
1092
float x = srcptr[i];
1093
dstptr[i] = x >= 0.f ? x : s*x;
1094
}
1095
}
1096
}
1097
1098
#ifdef HAVE_OPENCL
1099
bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
1100
{
1101
if (scale_umat.empty())
1102
scale.copyTo(scale_umat);
1103
1104
std::vector<UMat> inputs;
1105
std::vector<UMat> outputs;
1106
1107
inps.getUMatVector(inputs);
1108
outs.getUMatVector(outputs);
1109
String buildopt = oclGetTMacro(inputs[0]);
1110
1111
for (size_t i = 0; i < inputs.size(); i++)
1112
{
1113
UMat& src = inputs[i];
1114
UMat& dst = outputs[i];
1115
1116
ocl::Kernel kernel("PReLUForward", ocl::dnn::activations_oclsrc, buildopt);
1117
kernel.set(0, (int)src.total());
1118
kernel.set(1, (int)src.size[1]);
1119
kernel.set(2, (int)total(shape(src), 2));
1120
kernel.set(3, ocl::KernelArg::PtrReadOnly(src));
1121
kernel.set(4, ocl::KernelArg::PtrWriteOnly(dst));
1122
kernel.set(5, ocl::KernelArg::PtrReadOnly(scale_umat));
1123
1124
size_t gSize = src.total();
1125
CV_Assert(kernel.run(1, &gSize, NULL, false));
1126
}
1127
1128
return true;
1129
}
1130
#endif
1131
1132
#ifdef HAVE_HALIDE
1133
void attachHalide(const Halide::Expr& input, Halide::Func& top)
1134
{
1135
Halide::Var x("x"), y("y"), c("c"), n("n");
1136
auto weights = wrapToHalideBuffer(scale, {(int)scale.total()});
1137
top(x, y, c, n) = select(input >= 0.0f, input, weights(c) * input);
1138
}
1139
#endif // HAVE_HALIDE
1140
1141
#ifdef HAVE_INF_ENGINE
1142
InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)
1143
{
1144
CV_Error(Error::StsNotImplemented, "PReLU");
1145
return InferenceEngine::CNNLayerPtr();
1146
}
1147
#endif // HAVE_INF_ENGINE
1148
1149
#ifdef HAVE_VULKAN
1150
std::shared_ptr<vkcom::OpBase> initVkCom()
1151
{
1152
// TODO: add vkcom implementation
1153
return std::shared_ptr<vkcom::OpBase>();
1154
}
1155
#endif // HAVE_VULKAN
1156
1157
bool tryFuse(Ptr<dnn::Layer>&) { return false; }
1158
1159
void getScaleShift(Mat&, Mat&) const {}
1160
1161
int64 getFLOPSPerElement() const { return 1; }
1162
};
1163
1164
#define ACTIVATION_CREATOR_FOR(_Layer, _Functor, ...) \
1165
Ptr<_Layer> _Layer::create() { \
1166
return return Ptr<_Layer>( new ElementWiseLayer<_Functor>(_Functor()) ); }
1167
1168
1169
Ptr<ReLULayer> ReLULayer::create(const LayerParams& params)
1170
{
1171
float negativeSlope = params.get<float>("negative_slope", 0.f);
1172
Ptr<ReLULayer> l(new ElementWiseLayer<ReLUFunctor>(ReLUFunctor(negativeSlope)));
1173
l->setParamsFrom(params);
1174
l->negativeSlope = negativeSlope;
1175
1176
return l;
1177
}
1178
1179
Ptr<ReLU6Layer> ReLU6Layer::create(const LayerParams& params)
1180
{
1181
float minValue = params.get<float>("min_value", 0.0f);
1182
float maxValue = params.get<float>("max_value", 6.0f);
1183
Ptr<ReLU6Layer> l(new ElementWiseLayer<ReLU6Functor>(ReLU6Functor(minValue, maxValue)));
1184
l->setParamsFrom(params);
1185
l->minValue = minValue;
1186
l->maxValue = maxValue;
1187
1188
return l;
1189
}
1190
1191
Ptr<TanHLayer> TanHLayer::create(const LayerParams& params)
1192
{
1193
Ptr<TanHLayer> l(new ElementWiseLayer<TanHFunctor>());
1194
l->setParamsFrom(params);
1195
1196
return l;
1197
}
1198
1199
Ptr<SigmoidLayer> SigmoidLayer::create(const LayerParams& params)
1200
{
1201
Ptr<SigmoidLayer> l(new ElementWiseLayer<SigmoidFunctor>());
1202
l->setParamsFrom(params);
1203
1204
return l;
1205
}
1206
1207
Ptr<ELULayer> ELULayer::create(const LayerParams& params)
1208
{
1209
Ptr<ELULayer> l(new ElementWiseLayer<ELUFunctor>(ELUFunctor()));
1210
l->setParamsFrom(params);
1211
1212
return l;
1213
}
1214
1215
Ptr<AbsLayer> AbsLayer::create(const LayerParams& params)
1216
{
1217
Ptr<AbsLayer> l(new ElementWiseLayer<AbsValFunctor>());
1218
l->setParamsFrom(params);
1219
1220
return l;
1221
}
1222
1223
Ptr<BNLLLayer> BNLLLayer::create(const LayerParams& params)
1224
{
1225
Ptr<BNLLLayer> l(new ElementWiseLayer<BNLLFunctor>());
1226
l->setParamsFrom(params);
1227
1228
return l;
1229
}
1230
1231
Ptr<PowerLayer> PowerLayer::create(const LayerParams& params)
1232
{
1233
float power = params.get<float>("power", 1.0f);
1234
float scale = params.get<float>("scale", 1.0f);
1235
float shift = params.get<float>("shift", 0.0f);
1236
Ptr<PowerLayer> l(new ElementWiseLayer<PowerFunctor>(PowerFunctor(power, scale, shift)));
1237
l->setParamsFrom(params);
1238
l->power = power;
1239
l->scale = scale;
1240
l->shift = shift;
1241
1242
return l;
1243
}
1244
1245
Ptr<Layer> ChannelsPReLULayer::create(const LayerParams& params)
1246
{
1247
CV_Assert(params.blobs.size() == 1);
1248
if (params.blobs[0].total() == 1)
1249
{
1250
LayerParams reluParams = params;
1251
reluParams.set("negative_slope", params.blobs[0].at<float>(0));
1252
return ReLULayer::create(reluParams);
1253
}
1254
Ptr<ChannelsPReLULayer> l(new ElementWiseLayer<ChannelsPReLUFunctor>(ChannelsPReLUFunctor(params.blobs[0])));
1255
l->setParamsFrom(params);
1256
1257
return l;
1258
}
1259
1260
}
1261
}
1262
1263