CoCalc -- region

GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/dnn/src/layers/region_layer.cpp
¹⁶³³⁷ views
1
/*M ///////////////////////////////////////////////////////////////////////////////////////
2
//
3
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4
//
5
//  By downloading, copying, installing or using the software you agree to this license.
6
//  If you do not agree to this license, do not download, install,
7
//  copy or use the software.
8
//
9
//
10
//                           License Agreement
11
//                For Open Source Computer Vision Library
12
//
13
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14
// Copyright (C) 2017, Intel Corporation, all rights reserved.
15
// Third party copyrights are property of their respective owners.
16
//
17
// Redistribution and use in source and binary forms, with or without modification,
18
// are permitted provided that the following conditions are met:
19
//
20
//   * Redistribution's of source code must retain the above copyright notice,
21
//     this list of conditions and the following disclaimer.
22
//
23
//   * Redistribution's in binary form must reproduce the above copyright notice,
24
//     this list of conditions and the following disclaimer in the documentation
25
//     and/or other materials provided with the distribution.
26
//
27
//   * The name of the copyright holders may not be used to endorse or promote products
28
//     derived from this software without specific prior written permission.
29
//
30
// This software is provided by the copyright holders and contributors "as is" and
31
// any express or implied warranties, including, but not limited to, the implied
32
// warranties of merchantability and fitness for a particular purpose are disclaimed.
33
// In no event shall the Intel Corporation or contributors be liable for any direct,
34
// indirect, incidental, special, exemplary, or consequential damages
35
// (including, but not limited to, procurement of substitute goods or services;
36
// loss of use, data, or profits; or business interruption) however caused
37
// and on any theory of liability, whether in contract, strict liability,
38
// or tort (including negligence or otherwise) arising in any way out of
39
// the use of this software, even if advised of the possibility of such damage.
40
//
41
//M*/
42

43
#include "../precomp.hpp"
44
#include <opencv2/dnn/shape_utils.hpp>
45
#include <opencv2/dnn/all_layers.hpp>
46
#include "../nms.inl.hpp"
47

48
#ifdef HAVE_OPENCL
49
#include "opencl_kernels_dnn.hpp"
50
#endif
51

52
namespace cv
53
{
54
namespace dnn
55
{
56

57
class RegionLayerImpl CV_FINAL : public RegionLayer
58
{
59
public:
60
    int coords, classes, anchors, classfix;
61
    float thresh, nmsThreshold;
62
    bool useSoftmax, useLogistic;
63
#ifdef HAVE_OPENCL
64
    UMat blob_umat;
65
#endif
66

67
    RegionLayerImpl(const LayerParams& params)
68
    {
69
        setParamsFrom(params);
70
        CV_Assert(blobs.size() == 1);
71

72
        thresh = params.get<float>("thresh", 0.2);
73
        coords = params.get<int>("coords", 4);
74
        classes = params.get<int>("classes", 0);
75
        anchors = params.get<int>("anchors", 5);
76
        classfix = params.get<int>("classfix", 0);
77
        useSoftmax = params.get<bool>("softmax", false);
78
        useLogistic = params.get<bool>("logistic", false);
79
        nmsThreshold = params.get<float>("nms_threshold", 0.4);
80

81
        CV_Assert(nmsThreshold >= 0.);
82
        CV_Assert(coords == 4);
83
        CV_Assert(classes >= 1);
84
        CV_Assert(anchors >= 1);
85
        CV_Assert(useLogistic || useSoftmax);
86
        if (params.get<bool>("softmax_tree", false))
87
            CV_Error(cv::Error::StsNotImplemented, "Yolo9000 is not implemented");
88
    }
89

90
    bool getMemoryShapes(const std::vector<MatShape> &inputs,
91
                         const int requiredOutputs,
92
                         std::vector<MatShape> &outputs,
93
                         std::vector<MatShape> &internals) const CV_OVERRIDE
94
    {
95
        CV_Assert(inputs.size() > 0);
96
        // channels == cell_size*anchors
97
        CV_Assert(inputs[0][3] == (1 + coords + classes)*anchors);
98
        int batch_size = inputs[0][0];
99
        if(batch_size > 1)
100
            outputs = std::vector<MatShape>(1, shape(batch_size, inputs[0][1] * inputs[0][2] * anchors, inputs[0][3] / anchors));
101
        else
102
            outputs = std::vector<MatShape>(1, shape(inputs[0][1] * inputs[0][2] * anchors, inputs[0][3] / anchors));
103
        return false;
104
    }
105

106
    float logistic_activate(float x) { return 1.F / (1.F + exp(-x)); }
107

108
    void softmax_activate(const float* input, const int n, const float temp, float* output)
109
    {
110
        int i;
111
        float sum = 0;
112
        float largest = -FLT_MAX;
113
        for (i = 0; i < n; ++i) {
114
            if (input[i] > largest) largest = input[i];
115
        }
116
        for (i = 0; i < n; ++i) {
117
            float e = exp((input[i] - largest) / temp);
118
            sum += e;
119
            output[i] = e;
120
        }
121
        for (i = 0; i < n; ++i) {
122
            output[i] /= sum;
123
        }
124
    }
125

126
#ifdef HAVE_OPENCL
127
    bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
128
    {
129
        if (blob_umat.empty())
130
            blobs[0].copyTo(blob_umat);
131

132
        std::vector<UMat> inputs;
133
        std::vector<UMat> outputs;
134

135
        // TODO: implement a logistic activation to classification scores.
136
        if (useLogistic || inps.depth() == CV_16S)
137
            return false;
138

139
        inps.getUMatVector(inputs);
140
        outs.getUMatVector(outputs);
141

142
        CV_Assert(inputs.size() >= 1);
143
        int const cell_size = classes + coords + 1;
144

145
        for (size_t ii = 0; ii < outputs.size(); ii++)
146
        {
147
            UMat& inpBlob = inputs[ii];
148
            UMat& outBlob = outputs[ii];
149

150
            int batch_size = inpBlob.size[0];
151
            int rows = inpBlob.size[1];
152
            int cols = inpBlob.size[2];
153

154
            // channels == cell_size*anchors, see l. 94
155
            int sample_size = cell_size*rows*cols*anchors;
156

157
            ocl::Kernel logistic_kernel("logistic_activ", ocl::dnn::region_oclsrc);
158
            size_t nanchors = rows*cols*anchors*batch_size;
159
            logistic_kernel.set(0, (int)nanchors);
160
            logistic_kernel.set(1, ocl::KernelArg::PtrReadOnly(inpBlob));
161
            logistic_kernel.set(2, (int)cell_size);
162
            logistic_kernel.set(3, ocl::KernelArg::PtrWriteOnly(outBlob));
163
            logistic_kernel.run(1, &nanchors, NULL, false);
164

165
            if (useSoftmax)
166
            {
167
                // Yolo v2
168
                // softmax activation for Probability, for each grid cell (X x Y x Anchor-index)
169
                ocl::Kernel softmax_kernel("softmax_activ", ocl::dnn::region_oclsrc);
170
                size_t nanchors = rows*cols*anchors*batch_size;
171
                softmax_kernel.set(0, (int)nanchors);
172
                softmax_kernel.set(1, ocl::KernelArg::PtrReadOnly(inpBlob));
173
                softmax_kernel.set(2, ocl::KernelArg::PtrReadOnly(blob_umat));
174
                softmax_kernel.set(3, (int)cell_size);
175
                softmax_kernel.set(4, (int)classes);
176
                softmax_kernel.set(5, (int)classfix);
177
                softmax_kernel.set(6, (int)rows);
178
                softmax_kernel.set(7, (int)cols);
179
                softmax_kernel.set(8, (int)anchors);
180
                softmax_kernel.set(9, (float)thresh);
181
                softmax_kernel.set(10, ocl::KernelArg::PtrWriteOnly(outBlob));
182
                if (!softmax_kernel.run(1, &nanchors, NULL, false))
183
                    return false;
184
            }
185

186
            if (nmsThreshold > 0) {
187
                Mat mat = outBlob.getMat(ACCESS_WRITE);
188
                float *dstData = mat.ptr<float>();
189
                for (int b = 0; b < batch_size; ++b)
190
                    do_nms_sort(dstData + b*sample_size, rows*cols*anchors, thresh, nmsThreshold);
191
            }
192

193
        }
194

195
        return true;
196
    }
197
#endif
198

199
    void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
200
    {
201
        CV_TRACE_FUNCTION();
202
        CV_TRACE_ARG_VALUE(name, "name", name.c_str());
203

204
        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
205
                   forward_ocl(inputs_arr, outputs_arr, internals_arr))
206

207
        if (inputs_arr.depth() == CV_16S)
208
        {
209
            forward_fallback(inputs_arr, outputs_arr, internals_arr);
210
            return;
211
        }
212

213
        std::vector<Mat> inputs, outputs, internals;
214
        inputs_arr.getMatVector(inputs);
215
        outputs_arr.getMatVector(outputs);
216
        internals_arr.getMatVector(internals);
217

218
        CV_Assert(inputs.size() >= 1);
219
        CV_Assert(outputs.size() == 1);
220
        int const cell_size = classes + coords + 1;
221

222
        const float* biasData = blobs[0].ptr<float>();
223

224
        for (size_t ii = 0; ii < outputs.size(); ii++)
225
        {
226
            Mat &inpBlob = inputs[ii];
227
            Mat &outBlob = outputs[ii];
228

229
            int batch_size = inpBlob.size[0];
230
            int rows = inpBlob.size[1];
231
            int cols = inpBlob.size[2];
232

233
            // address length for one image in batch, both for input and output
234
            int sample_size = cell_size*rows*cols*anchors;
235

236
            // assert that the comment above is true
237
            CV_Assert(sample_size*batch_size == inpBlob.total());
238
            CV_Assert(sample_size*batch_size == outBlob.total());
239

240
            CV_Assert(inputs.size() < 2 || inputs[1].dims == 4);
241
            int hNorm = inputs.size() > 1 ? inputs[1].size[2] : rows;
242
            int wNorm = inputs.size() > 1 ? inputs[1].size[3] : cols;
243

244
            const float *srcData = inpBlob.ptr<float>();
245
            float *dstData = outBlob.ptr<float>();
246

247
            // logistic activation for t0, for each grid cell (X x Y x Anchor-index)
248
            for (int i = 0; i < batch_size*rows*cols*anchors; ++i) {
249
                int index = cell_size*i;
250
                float x = srcData[index + 4];
251
                dstData[index + 4] = logistic_activate(x);	// logistic activation
252
            }
253

254
            if (useSoftmax) {  // Yolo v2
255
                for (int i = 0; i < batch_size*rows*cols*anchors; ++i) {
256
                    int index = cell_size*i;
257
                    softmax_activate(srcData + index + 5, classes, 1, dstData + index + 5);
258
                }
259
            }
260
            else if (useLogistic) {  // Yolo v3
261
                for (int i = 0; i < batch_size*rows*cols*anchors; ++i){
262
                    int index = cell_size*i;
263
                    const float* input = srcData + index + 5;
264
                    float* output = dstData + index + 5;
265
                    for (int c = 0; c < classes; ++c)
266
                        output[c] = logistic_activate(input[c]);
267
                }
268
            }
269
            for (int b = 0; b < batch_size; ++b)
270
                for (int x = 0; x < cols; ++x)
271
                    for(int y = 0; y < rows; ++y)
272
                        for (int a = 0; a < anchors; ++a) {
273
                            // relative start address for image b within the batch data
274
                            int index_sample_offset = sample_size*b;
275
                            int index = (y*cols + x)*anchors + a;  // index for each grid-cell & anchor
276
                            int p_index = index_sample_offset + index * cell_size + 4;
277
                            float scale = dstData[p_index];
278
                            if (classfix == -1 && scale < .5) scale = 0;  // if(t0 < 0.5) t0 = 0;
279
                            int box_index = index_sample_offset + index * cell_size;
280

281
                            dstData[box_index + 0] = (x + logistic_activate(srcData[box_index + 0])) / cols;
282
                            dstData[box_index + 1] = (y + logistic_activate(srcData[box_index + 1])) / rows;
283
                            dstData[box_index + 2] = exp(srcData[box_index + 2]) * biasData[2 * a] / hNorm;
284
                            dstData[box_index + 3] = exp(srcData[box_index + 3]) * biasData[2 * a + 1] / wNorm;
285

286
                            int class_index = index_sample_offset + index * cell_size + 5;
287
                            for (int j = 0; j < classes; ++j) {
288
                                float prob = scale*dstData[class_index + j];  // prob = IoU(box, object) = t0 * class-probability
289
                                dstData[class_index + j] = (prob > thresh) ? prob : 0;  // if (IoU < threshold) IoU = 0;
290
                            }
291
                        }
292
            if (nmsThreshold > 0) {
293
                for (int b = 0; b < batch_size; ++b){
294
                    do_nms_sort(dstData+b*sample_size, rows*cols*anchors, thresh, nmsThreshold);
295
                }
296
            }
297
        }
298
    }
299

300
    void do_nms_sort(float *detections, int total, float score_thresh, float nms_thresh)
301
    {
302
        std::vector<Rect2d> boxes(total);
303
        std::vector<float> scores(total);
304

305
        for (int i = 0; i < total; ++i)
306
        {
307
            Rect2d &b = boxes[i];
308
            int box_index = i * (classes + coords + 1);
309
            b.width = detections[box_index + 2];
310
            b.height = detections[box_index + 3];
311
            b.x = detections[box_index + 0] - b.width / 2;
312
            b.y = detections[box_index + 1] - b.height / 2;
313
        }
314

315
        std::vector<int> indices;
316
        for (int k = 0; k < classes; ++k)
317
        {
318
            for (int i = 0; i < total; ++i)
319
            {
320
                int box_index = i * (classes + coords + 1);
321
                int class_index = box_index + 5;
322
                scores[i] = detections[class_index + k];
323
                detections[class_index + k] = 0;
324
            }
325
            NMSBoxes(boxes, scores, score_thresh, nms_thresh, indices);
326
            for (int i = 0, n = indices.size(); i < n; ++i)
327
            {
328
                int box_index = indices[i] * (classes + coords + 1);
329
                int class_index = box_index + 5;
330
                detections[class_index + k] = scores[indices[i]];
331
            }
332
        }
333
    }
334

335
    virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
336
                           const std::vector<MatShape> &outputs) const CV_OVERRIDE
337
    {
338
        CV_UNUSED(outputs); // suppress unused variable warning
339

340
        int64 flops = 0;
341
        for(int i = 0; i < inputs.size(); i++)
342
        {
343
            flops += 60*total(inputs[i]);
344
        }
345
        return flops;
346
    }
347
};
348

349
Ptr<RegionLayer> RegionLayer::create(const LayerParams& params)
350
{
351
    return Ptr<RegionLayer>(new RegionLayerImpl(params));
352
}
353

354
}  // namespace dnn
355
}  // namespace cv
356

357
Product

Resources

Company