CoCalc -- softmax

GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/dnn/src/layers/softmax_layer.cpp
¹⁶³³⁷ views
1
/*M///////////////////////////////////////////////////////////////////////////////////////
2
//
3
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4
//
5
//  By downloading, copying, installing or using the software you agree to this license.
6
//  If you do not agree to this license, do not download, install,
7
//  copy or use the software.
8
//
9
//
10
//                           License Agreement
11
//                For Open Source Computer Vision Library
12
//
13
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14
// Copyright (C) 2017, Intel Corporation, all rights reserved.
15
// Third party copyrights are property of their respective owners.
16
//
17
// Redistribution and use in source and binary forms, with or without modification,
18
// are permitted provided that the following conditions are met:
19
//
20
//   * Redistribution's of source code must retain the above copyright notice,
21
//     this list of conditions and the following disclaimer.
22
//
23
//   * Redistribution's in binary form must reproduce the above copyright notice,
24
//     this list of conditions and the following disclaimer in the documentation
25
//     and/or other materials provided with the distribution.
26
//
27
//   * The name of the copyright holders may not be used to endorse or promote products
28
//     derived from this software without specific prior written permission.
29
//
30
// This software is provided by the copyright holders and contributors "as is" and
31
// any express or implied warranties, including, but not limited to, the implied
32
// warranties of merchantability and fitness for a particular purpose are disclaimed.
33
// In no event shall the Intel Corporation or contributors be liable for any direct,
34
// indirect, incidental, special, exemplary, or consequential damages
35
// (including, but not limited to, procurement of substitute goods or services;
36
// loss of use, data, or profits; or business interruption) however caused
37
// and on any theory of liability, whether in contract, strict liability,
38
// or tort (including negligence or otherwise) arising in any way out of
39
// the use of this software, even if advised of the possibility of such damage.
40
//
41
//M*/
42

43
#include "../precomp.hpp"
44
#include "layers_common.hpp"
45
#include "../op_halide.hpp"
46
#include "../op_inf_engine.hpp"
47
#include "../op_vkcom.hpp"
48
#include <algorithm>
49
#include <stdlib.h>
50
using std::max;
51

52
#ifdef HAVE_OPENCL
53
#include "opencl_kernels_dnn.hpp"
54
using namespace cv::dnn::ocl4dnn;
55
#endif
56

57
namespace cv
58
{
59
namespace dnn
60
{
61

62
class SoftMaxLayerImpl CV_FINAL : public SoftmaxLayer
63
{
64
public:
65

66
    SoftMaxLayerImpl(const LayerParams& params)
67
    {
68
        axisRaw = params.get<int>("axis", 1);
69
        logSoftMax = params.get<bool>("log_softmax", false);
70
        setParamsFrom(params);
71
    }
72

73
#ifdef HAVE_OPENCL
74
    Ptr<OCL4DNNSoftmax<float> > softmaxOp;
75
#endif
76

77
    bool getMemoryShapes(const std::vector<MatShape> &inputs,
78
                         const int requiredOutputs,
79
                         std::vector<MatShape> &outputs,
80
                         std::vector<MatShape> &internals) const CV_OVERRIDE
81
    {
82
        bool inplace = Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
83
        MatShape shape = inputs[0];
84
        int cAxis = clamp(axisRaw, shape.size());
85
        shape[cAxis] = 1;
86
        internals.assign(1, shape);
87
        return inplace;
88
    }
89

90
    virtual bool supportBackend(int backendId) CV_OVERRIDE
91
    {
92
        return backendId == DNN_BACKEND_OPENCV ||
93
               backendId == DNN_BACKEND_HALIDE && haveHalide() && axisRaw == 1 ||
94
               backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine() && !logSoftMax ||
95
               backendId == DNN_BACKEND_VKCOM && haveVulkan();
96
    }
97

98
#ifdef HAVE_OPENCL
99
    virtual void finalize(const std::vector<Mat*> &inputs, std::vector<Mat> &outputs) CV_OVERRIDE
100
    {
101
        softmaxOp.release();
102
    }
103

104
    bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
105
    {
106
        std::vector<UMat> inputs;
107
        std::vector<UMat> outputs;
108
        std::vector<UMat> internals;
109

110
        bool use_half = (inputs_.depth() == CV_16S);
111
        inputs_.getUMatVector(inputs);
112
        outputs_.getUMatVector(outputs);
113
        internals_.getUMatVector(internals);
114

115
        UMat& src = inputs[0];
116
        UMat& dstMat = outputs[0];
117
        int axis = clamp(axisRaw, src.dims);
118

119
        if (softmaxOp.empty())
120
        {
121
            OCL4DNNSoftmaxConfig config;
122
            config.in_shape = shape(inputs[0]);
123
            config.axis = axis;
124
            config.channels = inputs[0].size[axis];
125
            config.logsoftmax = logSoftMax;
126
            config.use_half = use_half;
127

128
            softmaxOp = Ptr<OCL4DNNSoftmax<float> >(new OCL4DNNSoftmax<float>(config));
129
        }
130

131
        if (softmaxOp->Forward(src, dstMat))
132
            return true;
133

134
        UMat& bufMat = internals[0];
135
        MatShape s = shape(src);
136
        size_t outerSize = total(s, 0, axis);
137
        size_t channels = src.size[axis];
138
        size_t innerSize = total(s, axis + 1);
139

140
        String buildOpts = format("-DT=%s", use_half ? "half" : "float");
141
        ocl::Kernel kmax, ksub, ksum, kdiv;
142

143
        if (!kmax.create("kernel_channel_max", ocl::dnn::softmax_oclsrc, buildOpts))
144
            return false;
145

146
        if (!ksub.create("kernel_channel_subtract", ocl::dnn::softmax_oclsrc, buildOpts))
147
            return false;
148

149
        if (!ksum.create("kernel_channel_sum", ocl::dnn::softmax_oclsrc, buildOpts))
150
            return false;
151

152
        if (logSoftMax) buildOpts += " -DLOG_SOFTMAX ";
153
        if (!kdiv.create("kernel_channel_div", ocl::dnn::softmax_oclsrc, buildOpts))
154
            return false;
155

156
        size_t bufSize = internals[0].total();
157
        size_t totalSize = src.total();
158

159
        size_t internal_globalSize[1] = { bufSize };
160
        size_t total_globalSize[1] = { totalSize };
161

162
        kmax.args((int)outerSize, (int)channels, (int)innerSize,
163
                  ocl::KernelArg::PtrReadOnly(src), ocl::KernelArg::PtrReadWrite(bufMat));
164
        if (!kmax.run(1, internal_globalSize, NULL, false))
165
            return false;
166

167
        ksub.args((int)totalSize, (int)outerSize, (int)channels, (int)innerSize,
168
                  ocl::KernelArg::PtrReadOnly(bufMat),
169
                  ocl::KernelArg::PtrReadOnly(src), ocl::KernelArg::PtrWriteOnly(dstMat));
170
        if (!ksub.run(1, total_globalSize, NULL, false))
171
            return false;
172

173
        ksum.args((int)outerSize, (int)channels, (int)innerSize,
174
                  ocl::KernelArg::PtrReadOnly(dstMat), ocl::KernelArg::PtrReadWrite(bufMat));
175
        if (!ksum.run(1, internal_globalSize, NULL, false))
176
            return false;
177

178
        kdiv.args((int)totalSize, (int)outerSize, (int)channels, (int)innerSize,
179
                  ocl::KernelArg::PtrReadOnly(bufMat), ocl::KernelArg::PtrReadWrite(dstMat));
180
        if (!kdiv.run(1, total_globalSize, NULL, false))
181
            return false;
182

183
        return true;
184
    }
185
#endif
186

187
    void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
188
    {
189
        CV_TRACE_FUNCTION();
190
        CV_TRACE_ARG_VALUE(name, "name", name.c_str());
191

192
        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
193
                   forward_ocl(inputs_arr, outputs_arr, internals_arr))
194

195
        if (inputs_arr.depth() == CV_16S)
196
        {
197
            forward_fallback(inputs_arr, outputs_arr, internals_arr);
198
            return;
199
        }
200

201
        std::vector<Mat> inputs, outputs, internals;
202
        inputs_arr.getMatVector(inputs);
203
        outputs_arr.getMatVector(outputs);
204
        internals_arr.getMatVector(internals);
205

206
        const Mat &src = inputs[0];
207
        Mat &dst = outputs[0];
208

209
        int axis = clamp(axisRaw, src.dims);
210
        size_t outerSize = src.total(0, axis), channels = src.size[axis],
211
                innerSize = src.total(axis + 1);
212

213
        CV_Assert(src.type() == CV_32F);
214
        CV_Assert(src.isContinuous() && dst.isContinuous());
215

216
        const float *srcPtr = src.ptr<float>();
217
        float *dstPtr = dst.ptr<float>();
218
        float *bufPtr = internals[0].ptr<float>();
219

220
        size_t outerStep = src.total(axis);
221
        size_t cnStep = src.total(axis + 1);
222

223
        //compute max along axis
224
        for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
225
        {
226
            size_t srcOffset = outerDim * outerStep;
227
            size_t bufOffset = outerDim * cnStep;
228

229
            memcpy(bufPtr + bufOffset, srcPtr + srcOffset, innerSize * sizeof(float));
230

231
            for (size_t cnDim = 1; cnDim < channels; cnDim++)
232
            {
233
                for (size_t i = 0; i < innerSize; i++)
234
                    bufPtr[bufOffset + i] = std::max(bufPtr[bufOffset + i], srcPtr[srcOffset + cnDim * cnStep + i]);
235
            }
236
        }
237

238
        //subtract max
239
        for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
240
        {
241
            size_t srcOffset = outerDim * outerStep;
242
            size_t bufOffset = outerDim * cnStep;
243

244
            for (size_t cnDim = 0; cnDim < channels; cnDim++)
245
            {
246
                const int offset = srcOffset + cnDim * cnStep;
247
                for (size_t i = 0; i < innerSize; i++)
248
                    dstPtr[offset + i] = srcPtr[offset + i] - bufPtr[bufOffset + i];
249
            }
250
        }
251

252
        cv::exp(dst, dst);
253

254
        for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
255
        {
256
            size_t srcOffset = outerDim * outerStep;
257
            size_t bufOffset = outerDim * cnStep;
258

259
            //sum exp along axis
260
            for (size_t i = 0; i < innerSize; i++)
261
                bufPtr[bufOffset + i] = 0.f;
262

263
            for (size_t cnDim = 0; cnDim < channels; cnDim++)
264
            {
265
                const int offset = srcOffset + cnDim * cnStep;
266
                for (size_t i = 0; i < innerSize; i++)
267
                    bufPtr[bufOffset + i] += dstPtr[offset + i];
268
            }
269

270
            //divide by computed sum
271
            for (size_t cnDim = 0; cnDim < channels; cnDim++)
272
            {
273
                const int offset = srcOffset + cnDim * cnStep;
274
                for (size_t i = 0; i < innerSize; i++)
275
                    dstPtr[offset + i] /= bufPtr[bufOffset + i];
276
            }
277
            if (logSoftMax)
278
            {
279
                for (size_t cnDim = 0; cnDim < channels; cnDim++)
280
                {
281
                    const int offset = srcOffset + cnDim * cnStep;
282
                    for (size_t i = 0; i < innerSize; i++)
283
                        dstPtr[offset + i] = log(dstPtr[offset + i]);
284
                }
285
            }
286
        }
287
    }
288

289
    virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
290
    {
291
#ifdef HAVE_VULKAN
292
        vkcom::Tensor in = VkComTensor(inputs[0]);
293
        int cAxis = clamp(axisRaw, in.dimNum());
294
        std::shared_ptr<vkcom::OpBase> op(new vkcom::OpSoftmax(cAxis, logSoftMax));
295
        return Ptr<BackendNode>(new VkComBackendNode(inputs, op));
296
#endif  // HAVE_VULKAN
297
        return Ptr<BackendNode>();
298
    }
299

300

301
    virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
302
    {
303
#ifdef HAVE_HALIDE
304
        Halide::Buffer<float> inputBuffer = halideBuffer(inputs[0]);
305
        int inW, inH, inC, inN;
306
        getCanonicalSize(inputBuffer, &inW, &inH, &inC, &inN);
307

308
        if (inW != 1 || inH != 1)
309
            CV_Error(cv::Error::StsNotImplemented,
310
                     "Halide backend for SoftMax with spatial size "
311
                     "more than 1x1 is not implemented");
312

313
        Halide::Var x("x"), y("y"), c("c"), n("n");
314
        Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
315

316
        Halide::Func expInput("expInput");
317
        Halide::RDom r(0, inW, 0, inH, 0, inC);
318
        expInput(x, y, c, n) = exp(inputBuffer(x, y, c, n));
319
        Halide::Expr globalSum = sum(expInput(r.x, r.y, r.z, n));
320
        top(x, y, c, n) = expInput(x, y, c, n) / globalSum;
321
        return Ptr<BackendNode>(new HalideBackendNode(top));
322
#endif  // HAVE_HALIDE
323
        return Ptr<BackendNode>();
324
    }
325

326
    virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
327
    {
328
#ifdef HAVE_INF_ENGINE
329
        InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
330

331
        InferenceEngine::LayerParams lp;
332
        lp.name = name;
333
        lp.type = "SoftMax";
334
        lp.precision = InferenceEngine::Precision::FP32;
335
        std::shared_ptr<InferenceEngine::SoftMaxLayer> ieLayer(new InferenceEngine::SoftMaxLayer(lp));
336
        ieLayer->axis = clamp(axisRaw, input->dims.size());
337
        return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
338
#endif  // HAVE_INF_ENGINE
339
        return Ptr<BackendNode>();
340
    }
341

342
    int64 getFLOPS(const std::vector<MatShape> &inputs,
343
                  const std::vector<MatShape> &outputs) const CV_OVERRIDE
344
    {
345
        CV_UNUSED(outputs); // suppress unused variable warning
346
        int64 flops = 0;
347

348
        for (int i = 0; i < inputs.size(); i++)
349
        {
350
            flops += 4*total(inputs[i]);
351
        }
352

353
        return flops;
354
    }
355

356
    int axisRaw;
357
};
358

359
Ptr<SoftmaxLayer> SoftmaxLayer::create(const LayerParams& params)
360
{
361
    return Ptr<SoftmaxLayer>(new SoftMaxLayerImpl(params));
362
}
363

364
}
365
}
366

367
Product

Resources

Company