Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp
16348 views
1
/*M///////////////////////////////////////////////////////////////////////////////////////
2
//
3
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4
//
5
// By downloading, copying, installing or using the software you agree to this license.
6
// If you do not agree to this license, do not download, install,
7
// copy or use the software.
8
//
9
//
10
// License Agreement
11
// For Open Source Computer Vision Library
12
//
13
// Copyright (C) 2017, Intel Corporation, all rights reserved.
14
// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved.
15
// Third party copyrights are property of their respective owners.
16
//
17
// Redistribution and use in source and binary forms, with or without modification,
18
// are permitted provided that the following conditions are met:
19
//
20
// * Redistribution's of source code must retain the above copyright notice,
21
// this list of conditions and the following disclaimer.
22
//
23
// * Redistribution's in binary form must reproduce the above copyright notice,
24
// this list of conditions and the following disclaimer in the documentation
25
// and/or other materials provided with the distribution.
26
//
27
// * The name of the copyright holders may not be used to endorse or promote products
28
// derived from this software without specific prior written permission.
29
//
30
// This software is provided by the copyright holders and contributors "as is" and
31
// any express or implied warranties, including, but not limited to, the implied
32
// warranties of merchantability and fitness for a particular purpose are disclaimed.
33
// In no event shall the Intel Corporation or contributors be liable for any direct,
34
// indirect, incidental, special, exemplary, or consequential damages
35
// (including, but not limited to, procurement of substitute goods or services;
36
// loss of use, data, or profits; or business interruption) however caused
37
// and on any theory of liability, whether in contract, strict liability,
38
// or tort (including negligence or otherwise) arising in any way out of
39
// the use of this software, even if advised of the possibility of such damage.
40
//
41
//M*/
42
43
#ifndef _OPENCV_LIBDNN_HPP_
44
#define _OPENCV_LIBDNN_HPP_
45
#include <iomanip>
46
#include <map>
47
#include <memory>
48
#include <string>
49
#include <vector>
50
#include "common.hpp"
51
52
namespace cv { namespace dnn { namespace ocl4dnn {
53
54
struct OCL4DNNConvConfig
55
{
56
OCL4DNNConvConfig() :
57
kernel(1, 1),
58
pad(0, 0),
59
stride(1, 1),
60
dilation(1, 1),
61
group(1),
62
bias_term(false),
63
use_half(false)
64
{}
65
MatShape in_shape;
66
MatShape out_shape;
67
Size kernel;
68
Size pad;
69
Size stride;
70
Size dilation;
71
int group; // = 1;
72
bool bias_term; // = false;
73
bool use_half; // = false;
74
};
75
76
typedef enum {
77
OCL4DNN_CONV_FUSED_ACTIV_NONE = 0,
78
OCL4DNN_CONV_FUSED_ACTIV_RELU = 1,
79
OCL4DNN_CONV_FUSED_ACTIV_PRELU = 2,
80
OCL4DNN_CONV_FUSED_ACTIV_POWER = 3,
81
OCL4DNN_CONV_FUSED_ACTIV_TANH = 4,
82
OCL4DNN_CONV_FUSED_ACTIV_RELU6 = 5
83
} ocl4dnnFusedActiv_t;
84
85
template<typename Dtype>
86
class OCL4DNNConvSpatial
87
{
88
public:
89
explicit OCL4DNNConvSpatial(OCL4DNNConvConfig config);
90
~OCL4DNNConvSpatial();
91
bool Forward(const UMat& bottom_data,
92
const UMat& bottom_data2,
93
const UMat& weight,
94
const UMat& bias,
95
UMat& top_data, int32_t batch_size);
96
void setActivReLU(bool fuse_activ, float slope);
97
void setActivPReLU(bool fuse_activ, std::vector<float> &slope);
98
void setActivPower(bool fuse_activ, float power);
99
void setActivTanh(bool fuse_activ);
100
void setActivReLU6(bool fuse_activ, float min, float max);
101
void setBias(bool bias_term);
102
103
private:
104
struct kernelConfig
105
{
106
std::string kernelName;
107
float executionTime;
108
size_t local_work_size[3];
109
size_t global_work_size[3];
110
int32_t workItem_output[3];
111
bool verified;
112
bool tested;
113
bool swizzle_weights;
114
bool use_null_local;
115
int32_t kernelType;
116
117
kernelConfig()
118
{}
119
120
kernelConfig(const std::string& name, const size_t* global_size, const size_t* local_size,
121
const int32_t* workItem,
122
bool swizzle,
123
int32_t type = 0)
124
: executionTime(0)
125
{
126
kernelName = name;
127
for (int32_t x = 0; x < 3; x++)
128
{
129
local_work_size[x] = local_size ? local_size[x] : 1;
130
global_work_size[x] = global_size[x];
131
workItem_output[x] = workItem[x];
132
}
133
swizzle_weights = swizzle;
134
use_null_local = local_size == NULL;
135
verified = false;
136
tested = false;
137
kernelType = type;
138
}
139
};
140
141
struct tunerParam
142
{
143
int kernelType;
144
int blockWidth;
145
int blockHeight;
146
int blockDepth;
147
148
tunerParam(int type, int w, int h, int d)
149
{
150
kernelType = type;
151
blockWidth = w;
152
blockHeight= h;
153
blockDepth = d;
154
}
155
};
156
157
inline void addDef(const char* name)
158
{
159
options_ << " -D " << name;
160
}
161
162
inline void addDef(const char* name, const int value)
163
{
164
options_ << " -D " << name << "=" << value;
165
}
166
167
inline void addDef(const char* name, const float value)
168
{
169
options_ << " -D " << name << "=(float)" << value;
170
}
171
172
inline void addDef(const char* name, const double value)
173
{
174
options_ << " -D " << name << "=(double)" << value;
175
}
176
177
inline void addDef(const char* name, const char* value)
178
{
179
options_ << " -D " << name << "=" << value;
180
}
181
182
void useFirstAvailable(const UMat &bottom,
183
UMat &top,
184
const UMat &weight,
185
const UMat &bias,
186
int32_t numImages,
187
UMat &verifyTop);
188
void setupKernel();
189
void collectCommonInformation();
190
void setupKernelDetails(int32_t kernelType,
191
int32_t blockM,
192
int32_t blockK,
193
int32_t blockN);
194
195
ocl::Program compileKernel();
196
typedef std::map<std::string, ocl::Program> phash_t;
197
phash_t phash;
198
void calculateBenchmark(const UMat &bottom, UMat &verifyTop,
199
const UMat &weight, const UMat &bias,
200
int32_t numImages);
201
202
203
void setupConvolution(const UMat &bottom,
204
UMat &top,
205
const UMat &weight,
206
const UMat &bias,
207
int32_t numImags,
208
UMat &verifyTop);
209
bool createConvolutionKernel(int32_t kernelType,
210
int32_t blockWidth,
211
int32_t blockHeight,
212
int32_t blockDepth);
213
bool createIDLFKernel(int32_t blockWidth,
214
int32_t blockHeight,
215
int32_t blockDepth);
216
bool createBasicKernel(int32_t blockWidth,
217
int32_t blockHeight,
218
int32_t blockDepth);
219
bool createGEMMLikeConvKernel(int32_t blockWidth,
220
int32_t blockHeight,
221
int32_t blockDepth);
222
bool createDWConvKernel(int32_t blockWidth,
223
int32_t blockHeight,
224
int32_t blockDepth);
225
void CreateSubBuffer(const UMat& buffer, UMat& sub_buffer,
226
int32_t offset, int32_t size, bool write_only);
227
bool convolve(const UMat &bottom, UMat &top,
228
const UMat &weight, const UMat &bias,
229
int32_t numImages,
230
kernelConfig* config);
231
float timedConvolve(const UMat &bottom, UMat &top,
232
const UMat &weight, const UMat &bias,
233
int32_t numImages, kernelConfig* config);
234
235
bool verifyResult(const UMat &bottom,
236
UMat &top,
237
const UMat &weight,
238
const UMat &bias,
239
int32_t numImages,
240
kernelConfig* config,
241
UMat &verifyTop);
242
243
bool swizzleWeight(const UMat &weight,
244
int32_t swizzled_factor,
245
bool interleave = false);
246
247
void generateKey();
248
std::string generateSpecificKey(int32_t type, int32_t blockWidth,
249
int32_t blockHeight,
250
int32_t blockDepth);
251
void cacheTunedConfig();
252
bool loadTunedConfig();
253
254
void saveTunedConfig();
255
bool loadCachedConfig();
256
257
void unloadProgram(const std::string& kernelName);
258
void prepareKernel(const UMat &bottom, UMat &top,
259
const UMat &weight, const UMat &bias,
260
int32_t numImages);
261
bool setupKernelByConfig(int x, int y, int z, int type,
262
int lx, int ly, int lz,
263
bool swizzle, bool nullLocal);
264
void generateTunerItems(std::vector< cv::Ptr<tunerParam> > &tunerItems);
265
void generate_dwconv_tuneritems(std::vector< cv::Ptr<tunerParam> > &tunerItems,
266
int blockM, int blockK, int blockN);
267
void generate_gemmlike_tuneritems(std::vector< cv::Ptr<tunerParam> > &tunerItems,
268
int blockM, int blockK, int blockN);
269
void generate_idlf_tuneritems(std::vector< cv::Ptr<tunerParam> > &tunerItems,
270
int blockM, int blockK, int simd_size);
271
void setFusionDefine(ocl4dnnFusedActiv_t fused_activ, bool fused_eltwise);
272
void setFusionArg(ocl4dnnFusedActiv_t fused_activ, bool fused_eltwise, ocl::Kernel &kernel, cl_uint &argIdx);
273
274
int32_t group_;
275
bool bias_term_;
276
UMat swizzled_weights_umat;
277
UMat weights_half;
278
UMat bias_half;
279
UMat bottom_data2_;
280
281
int32_t bottom_index_;
282
int32_t output_h_;
283
int32_t output_w_;
284
int32_t kernel_h_;
285
int32_t kernel_w_;
286
int32_t height_;
287
int32_t width_;
288
int32_t pad_h_;
289
int32_t pad_w_;
290
int32_t pad_bottom_;
291
int32_t pad_right_;
292
int32_t stride_h_;
293
int32_t stride_w_;
294
int32_t dilation_h_;
295
int32_t dilation_w_;
296
297
/// M_ is the channel dimension of the output for a single group, which is the
298
/// leading dimension of the filter matrix.
299
int32_t M_;
300
301
bool tuned_;
302
bool dwconv_;
303
304
std::string key_, key_sanitized_;
305
std::string short_key_;
306
std::string kernel_name_;
307
std::string cache_path_;
308
bool use_cache_path_; // true if cache_path_ directory exists
309
bool run_auto_tuning_;
310
bool force_auto_tuning_;
311
int32_t kernel_index_;
312
std::vector< cv::Ptr<kernelConfig> > kernelQueue;
313
cv::Ptr<kernelConfig> bestKernelConfig;
314
315
int32_t bottom_dim_;
316
int32_t top_dim_;
317
int32_t num_;
318
int32_t channels_;
319
int32_t num_output_;
320
321
int32_t kernelType_;
322
int32_t blockM_;
323
int32_t blockK_;
324
int32_t blockN_;
325
std::stringstream options_;
326
cv::ocl::ProgramSource src_;
327
int32_t prev_kernel_type_;
328
float negative_slope_;
329
float min_value_;
330
float max_value_;
331
UMat negative_slope_umat_;
332
ocl4dnnFusedActiv_t fused_activ_;
333
float power_;
334
bool fused_eltwise_;
335
bool use_half_;
336
};
337
338
typedef enum {
339
LIBDNN_POOLING_METHOD_MAX = 0,
340
LIBDNN_POOLING_METHOD_AVE = 1,
341
LIBDNN_POOLING_METHOD_STO = 2
342
} ocl4dnnPoolingMethod_t;
343
344
struct OCL4DNNPoolConfig
345
{
346
OCL4DNNPoolConfig() :
347
kernel(1, 1),
348
pad_l(0), pad_t(0), pad_r(0), pad_b(0),
349
stride(1, 1),
350
dilation(1, 1),
351
channels(0),
352
pool_method(LIBDNN_POOLING_METHOD_MAX),
353
global_pooling(false),
354
avePoolPaddedArea(true),
355
computeMaxIdx(true),
356
use_half(false)
357
{}
358
MatShape in_shape;
359
MatShape out_shape;
360
Size kernel;
361
int pad_l, pad_t, pad_r, pad_b;
362
Size stride;
363
Size dilation;
364
365
int channels;
366
ocl4dnnPoolingMethod_t pool_method; // = LIBDNN_POOLING_METHOD_MAX;
367
bool global_pooling; // = false;
368
bool avePoolPaddedArea;
369
bool computeMaxIdx;
370
bool use_half;
371
};
372
373
template<typename Dtype>
374
class OCL4DNNPool
375
{
376
public:
377
explicit OCL4DNNPool(OCL4DNNPoolConfig config);
378
~OCL4DNNPool();
379
bool Forward(const UMat& bottom_data,
380
UMat& top_data,
381
UMat& top_mask);
382
private:
383
// Pooling parameters
384
std::vector<int32_t> stride_;
385
std::vector<int32_t> kernel_shape_;
386
std::vector<int32_t> im_in_shape_;
387
std::vector<int32_t> im_out_shape_;
388
389
ocl4dnnPoolingMethod_t pool_method_;
390
int32_t count_;
391
int32_t channels_;
392
int32_t kernel_h_;
393
int32_t kernel_w_;
394
int32_t stride_h_;
395
int32_t stride_w_;
396
int32_t pad_t_;
397
int32_t pad_l_;
398
int32_t pad_b_;
399
int32_t pad_r_;
400
int32_t height_;
401
int32_t width_;
402
int32_t pooled_height_;
403
int32_t pooled_width_;
404
bool avePoolPaddedArea;
405
bool computeMaxIdx;
406
bool use_half;
407
};
408
409
struct OCL4DNNInnerProductConfig
410
{
411
OCL4DNNInnerProductConfig() :
412
num_output(0), M(0), K(0),
413
bias_term(false), transpose(false), phase_test(true), use_half(false)
414
{}
415
int num_output;
416
int M;
417
int K;
418
bool bias_term;
419
bool transpose; // = false;
420
bool phase_test; // = true;
421
bool use_half; // = false;
422
};
423
424
template<typename Dtype>
425
class OCL4DNNInnerProduct
426
{
427
public:
428
explicit OCL4DNNInnerProduct(OCL4DNNInnerProductConfig config);
429
~OCL4DNNInnerProduct();
430
bool Forward(const UMat& bottom_data,
431
const UMat& weight,
432
const UMat& bias,
433
UMat& top_data);
434
private:
435
OCL4DNNInnerProductConfig config_;
436
int32_t axis_;
437
int32_t num_output_;
438
int32_t M_;
439
int32_t N_;
440
int32_t K_;
441
bool bias_term_;
442
bool transpose_;
443
bool image_copied_;
444
bool phase_test_;
445
bool use_half_;
446
};
447
448
typedef enum {
449
LRNParameter_NormRegion_ACROSS_CHANNELS = 0,
450
LRNParameter_NormRegion_WITHIN_CHANNEL = 1
451
} LRNParameter_NormRegion_WITHIN_CHANNEL_t;
452
453
struct OCL4DNNLRNConfig
454
{
455
OCL4DNNLRNConfig() :
456
lrn_type(LRNParameter_NormRegion_ACROSS_CHANNELS),
457
phase_test(true),
458
local_size(0), alpha(0.f), beta(0.f), k(0.f), norm_by_size(false),
459
batch_size(0), channels(0), height(0), width(0), use_half(false)
460
{}
461
MatShape in_shape;
462
LRNParameter_NormRegion_WITHIN_CHANNEL_t lrn_type;
463
bool phase_test; // = true;
464
int local_size;
465
float alpha;
466
float beta;
467
float k;
468
bool norm_by_size;
469
int32_t batch_size;
470
int32_t channels;
471
int32_t height;
472
int32_t width;
473
bool use_half;
474
};
475
476
template<typename Dtype>
477
class OCL4DNNLRN
478
{
479
public:
480
explicit OCL4DNNLRN(OCL4DNNLRNConfig config);
481
bool Forward(const UMat& bottom_data, UMat& top_data);
482
483
private:
484
bool crossChannelForward(const UMat& bottom_data, UMat& top_data);
485
LRNParameter_NormRegion_WITHIN_CHANNEL_t lrn_type_;
486
bool phase_test_;
487
int32_t size_;
488
Dtype alpha_;
489
Dtype beta_;
490
Dtype k_;
491
int32_t num_;
492
int32_t channels_;
493
int32_t height_;
494
int32_t width_;
495
bool norm_by_size_;
496
bool use_half_;
497
};
498
499
struct OCL4DNNSoftmaxConfig
500
{
501
OCL4DNNSoftmaxConfig() : axis(0), channels(0), logsoftmax(false), use_half(false)
502
{}
503
MatShape in_shape;
504
int axis;
505
int channels;
506
bool logsoftmax;
507
bool use_half;
508
};
509
510
template<typename Dtype>
511
class OCL4DNNSoftmax
512
{
513
public:
514
explicit OCL4DNNSoftmax(OCL4DNNSoftmaxConfig config);
515
~OCL4DNNSoftmax();
516
bool Forward(const UMat& bottom_data, UMat& top_data);
517
518
private:
519
int32_t softmax_axis_;
520
int32_t inner_num_;
521
int32_t outer_num_;
522
int32_t channels_;
523
int32_t count_;
524
bool use_slm_;
525
bool log_softmax_;
526
UMat scale_data_;
527
bool use_half_;
528
};
529
530
}}} // namespace cv::dnn::ocl4dnn
531
532
#endif
533
534