Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/dnn/src/layers/concat_layer.cpp
16337 views
1
/*M///////////////////////////////////////////////////////////////////////////////////////
2
//
3
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4
//
5
// By downloading, copying, installing or using the software you agree to this license.
6
// If you do not agree to this license, do not download, install,
7
// copy or use the software.
8
//
9
//
10
// License Agreement
11
// For Open Source Computer Vision Library
12
//
13
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14
// Copyright (C) 2017, Intel Corporation, all rights reserved.
15
// Third party copyrights are property of their respective owners.
16
//
17
// Redistribution and use in source and binary forms, with or without modification,
18
// are permitted provided that the following conditions are met:
19
//
20
// * Redistribution's of source code must retain the above copyright notice,
21
// this list of conditions and the following disclaimer.
22
//
23
// * Redistribution's in binary form must reproduce the above copyright notice,
24
// this list of conditions and the following disclaimer in the documentation
25
// and/or other materials provided with the distribution.
26
//
27
// * The name of the copyright holders may not be used to endorse or promote products
28
// derived from this software without specific prior written permission.
29
//
30
// This software is provided by the copyright holders and contributors "as is" and
31
// any express or implied warranties, including, but not limited to, the implied
32
// warranties of merchantability and fitness for a particular purpose are disclaimed.
33
// In no event shall the Intel Corporation or contributors be liable for any direct,
34
// indirect, incidental, special, exemplary, or consequential damages
35
// (including, but not limited to, procurement of substitute goods or services;
36
// loss of use, data, or profits; or business interruption) however caused
37
// and on any theory of liability, whether in contract, strict liability,
38
// or tort (including negligence or otherwise) arising in any way out of
39
// the use of this software, even if advised of the possibility of such damage.
40
//
41
//M*/
42
43
#include "../precomp.hpp"
44
#include "layers_common.hpp"
45
#include "../op_halide.hpp"
46
#include "../op_inf_engine.hpp"
47
#include "../op_vkcom.hpp"
48
49
#ifdef HAVE_OPENCL
50
#include "opencl_kernels_dnn.hpp"
51
#endif
52
53
namespace cv
54
{
55
namespace dnn
56
{
57
58
class ConcatLayerImpl CV_FINAL : public ConcatLayer
59
{
60
public:
61
ConcatLayerImpl(const LayerParams& params)
62
{
63
setParamsFrom(params);
64
axis = params.get<int>("axis", 1);
65
padding = params.get<bool>("padding", false);
66
}
67
68
virtual bool getMemoryShapes(const std::vector<MatShape> &inputs,
69
const int requiredOutputs,
70
std::vector<MatShape> &outputs,
71
std::vector<MatShape> &internals) const CV_OVERRIDE
72
{
73
CV_Assert(inputs.size() > 0);
74
outputs.resize(1, inputs[0]);
75
int cAxis = clamp(axis, inputs[0]);
76
77
int axisSum = 0;
78
for (size_t i = 0; i < inputs.size(); i++)
79
{
80
MatShape curShape = inputs[i];
81
82
if (padding)
83
{
84
for (int curAxis = 0; curAxis < outputs[0].size(); curAxis++)
85
{
86
outputs[0][curAxis] = std::max(outputs[0][curAxis], curShape[curAxis]);
87
}
88
}
89
else
90
{
91
CV_Assert(curShape.size() == outputs[0].size());
92
for (int curAxis = 0; curAxis < outputs[0].size(); curAxis++)
93
{
94
if (curAxis != cAxis && outputs[0][curAxis] != curShape[curAxis])
95
CV_Error(Error::StsBadSize, "Inconsistent shape for ConcatLayer");
96
}
97
}
98
99
axisSum += curShape[cAxis];
100
}
101
outputs[0][cAxis] = axisSum;
102
return false;
103
}
104
105
virtual bool supportBackend(int backendId) CV_OVERRIDE
106
{
107
return backendId == DNN_BACKEND_OPENCV ||
108
backendId == DNN_BACKEND_HALIDE && haveHalide() && axis == 1 && !padding || // By channels
109
backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine() && !padding ||
110
backendId == DNN_BACKEND_VKCOM && haveVulkan() && !padding;
111
}
112
113
class ChannelConcatInvoker : public ParallelLoopBody
114
{
115
public:
116
std::vector<Mat>* inputs;
117
Mat* output;
118
int nstripes;
119
std::vector<const float*> chptrs;
120
121
static void run(std::vector<Mat>& inputs, Mat& output, int nstripes)
122
{
123
ChannelConcatInvoker cc;
124
cc.inputs = &inputs;
125
cc.output = &output;
126
cc.nstripes = nstripes;
127
128
size_t i, ninputs = inputs.size();
129
int nchannels = 0, batchsz = output.size[0];
130
for( i = 0; i < ninputs; i++ )
131
{
132
Mat& inp = inputs[i];
133
CV_Assert( inp.isContinuous() && (inp.type() == CV_32F || inp.type() == CV_16S) &&
134
inp.dims == 4 && inp.size[0] == output.size[0] &&
135
inp.size[2] == output.size[2] &&
136
inp.size[3] == output.size[3] );
137
nchannels += inp.size[1];
138
}
139
CV_Assert( nchannels == output.size[1] );
140
CV_Assert( output.isContinuous() && (output.type() == CV_32F || output.type() == CV_16S) );
141
142
cc.chptrs.resize(nchannels*batchsz);
143
144
int ofs = 0;
145
for( i = 0; i < ninputs; i++)
146
{
147
Mat& inp = inputs[i];
148
for( int j = 0; j < batchsz; j++ )
149
for( int k = 0; k < inp.size[1]; k++ )
150
{
151
const float* ptr = inp.ptr<float>(j, k);
152
cc.chptrs[ofs + j*nchannels + k] = ptr;
153
}
154
ofs += inp.size[1];
155
}
156
157
parallel_for_(Range(0, nstripes), cc, nstripes);
158
}
159
160
ChannelConcatInvoker() : inputs(0), output(0), nstripes(0) {}
161
162
void operator()(const Range& r) const CV_OVERRIDE
163
{
164
size_t planeSize = (size_t)output->size[2]*output->size[3];
165
size_t nch = chptrs.size();
166
size_t total = nch*planeSize;
167
size_t stripeSize = (total + nstripes - 1)/nstripes;
168
size_t stripeStart = r.start*stripeSize;
169
size_t stripeEnd = std::min(total, r.end*stripeSize);
170
const float** ptrs = (const float**)&chptrs[0];
171
float* outptr = output->ptr<float>();
172
size_t blockSize0 = 1 << 16;
173
174
for( size_t ofs0 = stripeStart; ofs0 < stripeEnd; )
175
{
176
size_t ch = ofs0/planeSize;
177
size_t ofs = ofs0 - ch*planeSize;
178
size_t blockSize = std::min(blockSize0, planeSize - ofs);
179
memcpy(outptr + ofs0, ptrs[ch] + ofs, blockSize*sizeof(outptr[0]));
180
ofs0 += blockSize;
181
}
182
}
183
};
184
185
#ifdef HAVE_OPENCL
186
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
187
{
188
std::vector<UMat> inputs;
189
std::vector<UMat> outputs;
190
191
bool use_half = (inps.depth() == CV_16S);
192
inps.getUMatVector(inputs);
193
outs.getUMatVector(outputs);
194
195
int cAxis = clamp(axis, inputs[0].dims);
196
if (padding)
197
return false;
198
199
int bottom_concat_axis;
200
int concat_size = total(shape(inputs[0]), cAxis + 1);
201
int top_concat_axis = outputs[0].size[cAxis];
202
int num_concats = total(shape(inputs[0]), 0, cAxis);
203
int offset_concat_axis = 0;
204
UMat& outMat = outputs[0];
205
String buildopt = format(" -DDtype=%s", (use_half) ? "half" : "float");
206
String kname = format("concat_%s", use_half ? "half" : "float");
207
208
for (size_t i = 0; i < inputs.size(); i++)
209
{
210
ocl::Kernel kernel(kname.c_str(), ocl::dnn::concat_oclsrc, buildopt);
211
if (kernel.empty())
212
return false;
213
214
UMat& inpMat = inputs[i];
215
bottom_concat_axis = inputs[i].size[cAxis];
216
size_t nthreads = inputs[i].total();
217
218
kernel.set(0, (int)nthreads);
219
kernel.set(1, ocl::KernelArg::PtrReadOnly(inpMat));
220
kernel.set(2, (int)num_concats);
221
kernel.set(3, (int)concat_size);
222
kernel.set(4, (int)top_concat_axis);
223
kernel.set(5, (int)bottom_concat_axis);
224
kernel.set(6, (int)offset_concat_axis);
225
kernel.set(7, ocl::KernelArg::PtrWriteOnly(outMat));
226
227
if (!kernel.run(1, &nthreads, NULL, false))
228
return false;
229
230
offset_concat_axis += bottom_concat_axis;
231
}
232
233
return true;
234
}
235
#endif
236
237
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
238
{
239
CV_TRACE_FUNCTION();
240
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
241
242
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
243
forward_ocl(inputs_arr, outputs_arr, internals_arr))
244
245
std::vector<Mat> inputs, outputs;
246
inputs_arr.getMatVector(inputs);
247
outputs_arr.getMatVector(outputs);
248
249
int cAxis = clamp(axis, inputs[0].dims);
250
Mat& outMat = outputs[0];
251
252
if (padding)
253
outMat.setTo(0);
254
255
if( cAxis == 1 && outMat.dims == 4 && !padding)
256
{
257
int nstripes = getNumThreads();
258
ChannelConcatInvoker::run(inputs, outMat, nstripes);
259
}
260
else
261
{
262
std::vector<Range> ranges(outputs[0].dims, Range::all());
263
264
ranges[cAxis].start = 0;
265
for (size_t i = 0; i < inputs.size(); i++)
266
{
267
ranges[cAxis].end = ranges[cAxis].start + inputs[i].size[cAxis];
268
for (int j = 0; j < outMat.dims; ++j)
269
{
270
if (j == cAxis) continue;
271
ranges[j].start = (outMat.size[j] - inputs[i].size[j]) / 2;
272
ranges[j].end = ranges[j].start + inputs[i].size[j];
273
}
274
inputs[i].copyTo(outMat(&ranges[0]));
275
ranges[cAxis].start = ranges[cAxis].end;
276
}
277
}
278
}
279
virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &input) CV_OVERRIDE
280
{
281
#ifdef HAVE_VULKAN
282
vkcom::Tensor in = VkComTensor(input[0]);
283
int cAxis = clamp(axis, in.dimNum());
284
std::shared_ptr<vkcom::OpBase> op(new vkcom::OpConcat(cAxis));
285
return Ptr<BackendNode>(new VkComBackendNode(input, op));
286
#endif // HAVE_VULKAN
287
return Ptr<BackendNode>();
288
}
289
290
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &input) CV_OVERRIDE
291
{
292
#ifdef HAVE_HALIDE
293
std::vector<Halide::Buffer<> > inputBuffers = halideBuffers(input);
294
295
Halide::Var x("x"), y("y"), c("c"), n("n");
296
Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
297
int offset = inputBuffers[0].channels();
298
Halide::Expr topExpr = select(c < offset,
299
inputBuffers[0](x, y, c, n),
300
inputBuffers[1](x, y, c - offset, n));
301
for (int i = 2; i < input.size(); ++i)
302
{
303
offset += inputBuffers[i - 1].channels();
304
topExpr = select(c < offset, topExpr,
305
inputBuffers[i](x, y, c - offset, n));
306
}
307
top(x, y, c, n) = topExpr;
308
return Ptr<BackendNode>(new HalideBackendNode(top));
309
#endif // HAVE_HALIDE
310
return Ptr<BackendNode>();
311
}
312
313
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
314
{
315
#ifdef HAVE_INF_ENGINE
316
InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
317
InferenceEngine::LayerParams lp;
318
lp.name = name;
319
lp.type = "Concat";
320
lp.precision = InferenceEngine::Precision::FP32;
321
std::shared_ptr<InferenceEngine::ConcatLayer> ieLayer(new InferenceEngine::ConcatLayer(lp));
322
ieLayer->_axis = clamp(axis, input->dims.size());
323
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
324
#endif // HAVE_INF_ENGINE
325
return Ptr<BackendNode>();
326
}
327
};
328
329
Ptr<ConcatLayer> ConcatLayer::create(const LayerParams& params)
330
{
331
return Ptr<ConcatLayer>(new ConcatLayerImpl(params));
332
}
333
334
}
335
}
336
337