Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/ml/src/ann_mlp.cpp
16337 views
1
/*M///////////////////////////////////////////////////////////////////////////////////////
2
//
3
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4
//
5
// By downloading, copying, installing or using the software you agree to this license.
6
// If you do not agree to this license, do not download, install,
7
// copy or use the software.
8
//
9
//
10
// Intel License Agreement
11
//
12
// Copyright (C) 2000, Intel Corporation, all rights reserved.
13
// Third party copyrights are property of their respective owners.
14
//
15
// Redistribution and use in source and binary forms, with or without modification,
16
// are permitted provided that the following conditions are met:
17
//
18
// * Redistribution's of source code must retain the above copyright notice,
19
// this list of conditions and the following disclaimer.
20
//
21
// * Redistribution's in binary form must reproduce the above copyright notice,
22
// this list of conditions and the following disclaimer in the documentation
23
// and/or other materials provided with the distribution.
24
//
25
// * The name of Intel Corporation may not be used to endorse or promote products
26
// derived from this software without specific prior written permission.
27
//
28
// This software is provided by the copyright holders and contributors "as is" and
29
// any express or implied warranties, including, but not limited to, the implied
30
// warranties of merchantability and fitness for a particular purpose are disclaimed.
31
// In no event shall the Intel Corporation or contributors be liable for any direct,
32
// indirect, incidental, special, exemplary, or consequential damages
33
// (including, but not limited to, procurement of substitute goods or services;
34
// loss of use, data, or profits; or business interruption) however caused
35
// and on any theory of liability, whether in contract, strict liability,
36
// or tort (including negligence or otherwise) arising in any way out of
37
// the use of this software, even if advised of the possibility of such damage.
38
//
39
//M*/
40
41
#include "precomp.hpp"
42
43
namespace cv { namespace ml {
44
45
struct AnnParams
46
{
47
AnnParams()
48
{
49
termCrit = TermCriteria( TermCriteria::COUNT + TermCriteria::EPS, 1000, 0.01 );
50
trainMethod = ANN_MLP::RPROP;
51
bpDWScale = bpMomentScale = 0.1;
52
rpDW0 = 0.1; rpDWPlus = 1.2; rpDWMinus = 0.5;
53
rpDWMin = FLT_EPSILON; rpDWMax = 50.;
54
initialT=10;finalT=0.1,coolingRatio=0.95;itePerStep=10;
55
rEnergy = cv::RNG(12345);
56
}
57
58
TermCriteria termCrit;
59
int trainMethod;
60
61
double bpDWScale;
62
double bpMomentScale;
63
64
double rpDW0;
65
double rpDWPlus;
66
double rpDWMinus;
67
double rpDWMin;
68
double rpDWMax;
69
70
double initialT;
71
double finalT;
72
double coolingRatio;
73
int itePerStep;
74
RNG rEnergy;
75
};
76
77
template <typename T>
78
inline T inBounds(T val, T min_val, T max_val)
79
{
80
return std::min(std::max(val, min_val), max_val);
81
}
82
83
class SimulatedAnnealingANN_MLP
84
{
85
protected:
86
ml::ANN_MLP& nn;
87
Ptr<ml::TrainData> data;
88
int nbVariables;
89
vector<double*> adrVariables;
90
RNG rVar;
91
RNG rIndex;
92
double varTmp;
93
int index;
94
public:
95
SimulatedAnnealingANN_MLP(ml::ANN_MLP& x, const Ptr<ml::TrainData>& d) : nn(x), data(d), varTmp(0.0), index(0)
96
{
97
initVarMap();
98
}
99
~SimulatedAnnealingANN_MLP() {}
100
101
void changeState()
102
{
103
index = rIndex.uniform(0, nbVariables);
104
double dv = rVar.uniform(-1.0, 1.0);
105
varTmp = *adrVariables[index];
106
*adrVariables[index] = dv;
107
}
108
109
void reverseState()
110
{
111
*adrVariables[index] = varTmp;
112
}
113
114
double energy() const { return nn.calcError(data, false, noArray()); }
115
116
protected:
117
void initVarMap()
118
{
119
Mat l = nn.getLayerSizes();
120
nbVariables = 0;
121
adrVariables.clear();
122
for (int i = 1; i < l.rows-1; i++)
123
{
124
Mat w = nn.getWeights(i);
125
for (int j = 0; j < w.rows; j++)
126
{
127
for (int k = 0; k < w.cols; k++, nbVariables++)
128
{
129
if (j == w.rows - 1)
130
{
131
adrVariables.push_back(&w.at<double>(w.rows - 1, k));
132
}
133
else
134
{
135
adrVariables.push_back(&w.at<double>(j, k));
136
}
137
}
138
}
139
}
140
}
141
142
};
143
144
class ANN_MLPImpl CV_FINAL : public ANN_MLP
145
{
146
public:
147
ANN_MLPImpl()
148
{
149
clear();
150
setActivationFunction( SIGMOID_SYM, 0, 0);
151
setLayerSizes(Mat());
152
setTrainMethod(ANN_MLP::RPROP, 0.1, FLT_EPSILON);
153
}
154
155
virtual ~ANN_MLPImpl() CV_OVERRIDE {}
156
157
inline TermCriteria getTermCriteria() const CV_OVERRIDE { return params.termCrit; }
158
inline void setTermCriteria(TermCriteria val) CV_OVERRIDE { params.termCrit = val; }
159
inline double getBackpropWeightScale() const CV_OVERRIDE { return params.bpDWScale; }
160
inline void setBackpropWeightScale(double val) CV_OVERRIDE { params.bpDWScale = val; }
161
inline double getBackpropMomentumScale() const CV_OVERRIDE { return params.bpMomentScale; }
162
inline void setBackpropMomentumScale(double val) CV_OVERRIDE { params.bpMomentScale = val; }
163
inline double getRpropDW0() const CV_OVERRIDE { return params.rpDW0; }
164
inline void setRpropDW0(double val) CV_OVERRIDE { params.rpDW0 = val; }
165
inline double getRpropDWPlus() const CV_OVERRIDE { return params.rpDWPlus; }
166
inline void setRpropDWPlus(double val) CV_OVERRIDE { params.rpDWPlus = val; }
167
inline double getRpropDWMinus() const CV_OVERRIDE { return params.rpDWMinus; }
168
inline void setRpropDWMinus(double val) CV_OVERRIDE { params.rpDWMinus = val; }
169
inline double getRpropDWMin() const CV_OVERRIDE { return params.rpDWMin; }
170
inline void setRpropDWMin(double val) CV_OVERRIDE { params.rpDWMin = val; }
171
inline double getRpropDWMax() const CV_OVERRIDE { return params.rpDWMax; }
172
inline void setRpropDWMax(double val) CV_OVERRIDE { params.rpDWMax = val; }
173
inline double getAnnealInitialT() const CV_OVERRIDE { return params.initialT; }
174
inline void setAnnealInitialT(double val) CV_OVERRIDE { params.initialT = val; }
175
inline double getAnnealFinalT() const CV_OVERRIDE { return params.finalT; }
176
inline void setAnnealFinalT(double val) CV_OVERRIDE { params.finalT = val; }
177
inline double getAnnealCoolingRatio() const CV_OVERRIDE { return params.coolingRatio; }
178
inline void setAnnealCoolingRatio(double val) CV_OVERRIDE { params.coolingRatio = val; }
179
inline int getAnnealItePerStep() const CV_OVERRIDE { return params.itePerStep; }
180
inline void setAnnealItePerStep(int val) CV_OVERRIDE { params.itePerStep = val; }
181
// disabled getAnnealEnergyRNG()
182
inline void setAnnealEnergyRNG(const RNG& val) CV_OVERRIDE { params.rEnergy = val; }
183
184
void clear() CV_OVERRIDE
185
{
186
min_val = max_val = min_val1 = max_val1 = 0.;
187
rng = RNG((uint64)-1);
188
weights.clear();
189
trained = false;
190
max_buf_sz = 1 << 12;
191
}
192
193
int layer_count() const { return (int)layer_sizes.size(); }
194
195
void setTrainMethod(int method, double param1, double param2) CV_OVERRIDE
196
{
197
if (method != ANN_MLP::RPROP && method != ANN_MLP::BACKPROP && method != ANN_MLP::ANNEAL)
198
method = ANN_MLP::RPROP;
199
params.trainMethod = method;
200
if(method == ANN_MLP::RPROP )
201
{
202
if( param1 < FLT_EPSILON )
203
param1 = 1.;
204
params.rpDW0 = param1;
205
params.rpDWMin = std::max( param2, 0. );
206
}
207
else if (method == ANN_MLP::BACKPROP)
208
{
209
if (param1 <= 0)
210
param1 = 0.1;
211
params.bpDWScale = inBounds<double>(param1, 1e-3, 1.);
212
if (param2 < 0)
213
param2 = 0.1;
214
params.bpMomentScale = std::min(param2, 1.);
215
}
216
}
217
218
int getTrainMethod() const CV_OVERRIDE
219
{
220
return params.trainMethod;
221
}
222
223
void setActivationFunction(int _activ_func, double _f_param1, double _f_param2) CV_OVERRIDE
224
{
225
if( _activ_func < 0 || _activ_func > LEAKYRELU)
226
CV_Error( CV_StsOutOfRange, "Unknown activation function" );
227
228
activ_func = _activ_func;
229
230
switch( activ_func )
231
{
232
case SIGMOID_SYM:
233
max_val = 0.95; min_val = -max_val;
234
max_val1 = 0.98; min_val1 = -max_val1;
235
if( fabs(_f_param1) < FLT_EPSILON )
236
_f_param1 = 2./3;
237
if( fabs(_f_param2) < FLT_EPSILON )
238
_f_param2 = 1.7159;
239
break;
240
case GAUSSIAN:
241
max_val = 1.; min_val = 0.05;
242
max_val1 = 1.; min_val1 = 0.02;
243
if (fabs(_f_param1) < FLT_EPSILON)
244
_f_param1 = 1.;
245
if (fabs(_f_param2) < FLT_EPSILON)
246
_f_param2 = 1.;
247
break;
248
case RELU:
249
if (fabs(_f_param1) < FLT_EPSILON)
250
_f_param1 = 1;
251
min_val = max_val = min_val1 = max_val1 = 0.;
252
_f_param2 = 0.;
253
break;
254
case LEAKYRELU:
255
if (fabs(_f_param1) < FLT_EPSILON)
256
_f_param1 = 0.01;
257
min_val = max_val = min_val1 = max_val1 = 0.;
258
_f_param2 = 0.;
259
break;
260
default:
261
min_val = max_val = min_val1 = max_val1 = 0.;
262
_f_param1 = 1.;
263
_f_param2 = 0.;
264
}
265
266
f_param1 = _f_param1;
267
f_param2 = _f_param2;
268
}
269
270
271
void init_weights()
272
{
273
int i, j, k, l_count = layer_count();
274
275
for( i = 1; i < l_count; i++ )
276
{
277
int n1 = layer_sizes[i-1];
278
int n2 = layer_sizes[i];
279
double val = 0, G = n2 > 2 ? 0.7*pow((double)n1,1./(n2-1)) : 1.;
280
double* w = weights[i].ptr<double>();
281
282
// initialize weights using Nguyen-Widrow algorithm
283
for( j = 0; j < n2; j++ )
284
{
285
double s = 0;
286
for( k = 0; k <= n1; k++ )
287
{
288
val = rng.uniform(0., 1.)*2-1.;
289
w[k*n2 + j] = val;
290
s += fabs(val);
291
}
292
293
if( i < l_count - 1 )
294
{
295
s = 1./(s - fabs(val));
296
for( k = 0; k <= n1; k++ )
297
w[k*n2 + j] *= s;
298
w[n1*n2 + j] *= G*(-1+j*2./n2);
299
}
300
}
301
}
302
}
303
304
Mat getLayerSizes() const CV_OVERRIDE
305
{
306
return Mat_<int>(layer_sizes, true);
307
}
308
309
void setLayerSizes( InputArray _layer_sizes ) CV_OVERRIDE
310
{
311
clear();
312
313
_layer_sizes.copyTo(layer_sizes);
314
int l_count = layer_count();
315
316
weights.resize(l_count + 2);
317
max_lsize = 0;
318
319
if( l_count > 0 )
320
{
321
for( int i = 0; i < l_count; i++ )
322
{
323
int n = layer_sizes[i];
324
if( n < 1 + (0 < i && i < l_count-1))
325
CV_Error( CV_StsOutOfRange,
326
"there should be at least one input and one output "
327
"and every hidden layer must have more than 1 neuron" );
328
max_lsize = std::max( max_lsize, n );
329
if( i > 0 )
330
weights[i].create(layer_sizes[i-1]+1, n, CV_64F);
331
}
332
333
int ninputs = layer_sizes.front();
334
int noutputs = layer_sizes.back();
335
weights[0].create(1, ninputs*2, CV_64F);
336
weights[l_count].create(1, noutputs*2, CV_64F);
337
weights[l_count+1].create(1, noutputs*2, CV_64F);
338
}
339
}
340
341
float predict( InputArray _inputs, OutputArray _outputs, int ) const CV_OVERRIDE
342
{
343
if( !trained )
344
CV_Error( CV_StsError, "The network has not been trained or loaded" );
345
346
Mat inputs = _inputs.getMat();
347
int type = inputs.type(), l_count = layer_count();
348
int n = inputs.rows, dn0 = n;
349
350
CV_Assert( (type == CV_32F || type == CV_64F) && inputs.cols == layer_sizes[0] );
351
int noutputs = layer_sizes[l_count-1];
352
Mat outputs;
353
354
int min_buf_sz = 2*max_lsize;
355
int buf_sz = n*min_buf_sz;
356
357
if( buf_sz > max_buf_sz )
358
{
359
dn0 = max_buf_sz/min_buf_sz;
360
dn0 = std::max( dn0, 1 );
361
buf_sz = dn0*min_buf_sz;
362
}
363
364
cv::AutoBuffer<double> _buf(buf_sz+noutputs);
365
double* buf = _buf.data();
366
367
if( !_outputs.needed() )
368
{
369
CV_Assert( n == 1 );
370
outputs = Mat(n, noutputs, type, buf + buf_sz);
371
}
372
else
373
{
374
_outputs.create(n, noutputs, type);
375
outputs = _outputs.getMat();
376
}
377
378
int dn = 0;
379
for( int i = 0; i < n; i += dn )
380
{
381
dn = std::min( dn0, n - i );
382
383
Mat layer_in = inputs.rowRange(i, i + dn);
384
Mat layer_out( dn, layer_in.cols, CV_64F, buf);
385
386
scale_input( layer_in, layer_out );
387
layer_in = layer_out;
388
389
for( int j = 1; j < l_count; j++ )
390
{
391
double* data = buf + ((j&1) ? max_lsize*dn0 : 0);
392
int cols = layer_sizes[j];
393
394
layer_out = Mat(dn, cols, CV_64F, data);
395
Mat w = weights[j].rowRange(0, layer_in.cols);
396
gemm(layer_in, w, 1, noArray(), 0, layer_out);
397
calc_activ_func( layer_out, weights[j] );
398
399
layer_in = layer_out;
400
}
401
402
layer_out = outputs.rowRange(i, i + dn);
403
scale_output( layer_in, layer_out );
404
}
405
406
if( n == 1 )
407
{
408
int maxIdx[] = {0, 0};
409
minMaxIdx(outputs, 0, 0, 0, maxIdx);
410
return (float)(maxIdx[0] + maxIdx[1]);
411
}
412
413
return 0.f;
414
}
415
416
void scale_input( const Mat& _src, Mat& _dst ) const
417
{
418
int cols = _src.cols;
419
const double* w = weights[0].ptr<double>();
420
421
if( _src.type() == CV_32F )
422
{
423
for( int i = 0; i < _src.rows; i++ )
424
{
425
const float* src = _src.ptr<float>(i);
426
double* dst = _dst.ptr<double>(i);
427
for( int j = 0; j < cols; j++ )
428
dst[j] = src[j]*w[j*2] + w[j*2+1];
429
}
430
}
431
else
432
{
433
for( int i = 0; i < _src.rows; i++ )
434
{
435
const double* src = _src.ptr<double>(i);
436
double* dst = _dst.ptr<double>(i);
437
for( int j = 0; j < cols; j++ )
438
dst[j] = src[j]*w[j*2] + w[j*2+1];
439
}
440
}
441
}
442
443
void scale_output( const Mat& _src, Mat& _dst ) const
444
{
445
int cols = _src.cols;
446
const double* w = weights[layer_count()].ptr<double>();
447
448
if( _dst.type() == CV_32F )
449
{
450
for( int i = 0; i < _src.rows; i++ )
451
{
452
const double* src = _src.ptr<double>(i);
453
float* dst = _dst.ptr<float>(i);
454
for( int j = 0; j < cols; j++ )
455
dst[j] = (float)(src[j]*w[j*2] + w[j*2+1]);
456
}
457
}
458
else
459
{
460
for( int i = 0; i < _src.rows; i++ )
461
{
462
const double* src = _src.ptr<double>(i);
463
double* dst = _dst.ptr<double>(i);
464
for( int j = 0; j < cols; j++ )
465
dst[j] = src[j]*w[j*2] + w[j*2+1];
466
}
467
}
468
}
469
470
void calc_activ_func(Mat& sums, const Mat& w) const
471
{
472
const double* bias = w.ptr<double>(w.rows - 1);
473
int i, j, n = sums.rows, cols = sums.cols;
474
double scale = 0, scale2 = f_param2;
475
476
switch (activ_func)
477
{
478
case IDENTITY:
479
scale = 1.;
480
break;
481
case SIGMOID_SYM:
482
scale = -f_param1;
483
break;
484
case GAUSSIAN:
485
scale = -f_param1*f_param1;
486
break;
487
case RELU:
488
scale = 1;
489
break;
490
case LEAKYRELU:
491
scale = 1;
492
break;
493
default:
494
;
495
}
496
497
CV_Assert(sums.isContinuous());
498
499
if (activ_func != GAUSSIAN)
500
{
501
for (i = 0; i < n; i++)
502
{
503
double* data = sums.ptr<double>(i);
504
for (j = 0; j < cols; j++)
505
{
506
data[j] = (data[j] + bias[j])*scale;
507
if (activ_func == RELU)
508
if (data[j] < 0)
509
data[j] = 0;
510
if (activ_func == LEAKYRELU)
511
if (data[j] < 0)
512
data[j] *= f_param1;
513
}
514
}
515
516
if (activ_func == IDENTITY || activ_func == RELU || activ_func == LEAKYRELU)
517
return;
518
}
519
else
520
{
521
for (i = 0; i < n; i++)
522
{
523
double* data = sums.ptr<double>(i);
524
for (j = 0; j < cols; j++)
525
{
526
double t = data[j] + bias[j];
527
data[j] = t*t*scale;
528
}
529
}
530
}
531
532
exp(sums, sums);
533
534
if (sums.isContinuous())
535
{
536
cols *= n;
537
n = 1;
538
}
539
540
switch (activ_func)
541
{
542
case SIGMOID_SYM:
543
for (i = 0; i < n; i++)
544
{
545
double* data = sums.ptr<double>(i);
546
for (j = 0; j < cols; j++)
547
{
548
if (!cvIsInf(data[j]))
549
{
550
double t = scale2*(1. - data[j]) / (1. + data[j]);
551
data[j] = t;
552
}
553
else
554
{
555
data[j] = -scale2;
556
}
557
}
558
}
559
break;
560
561
case GAUSSIAN:
562
for (i = 0; i < n; i++)
563
{
564
double* data = sums.ptr<double>(i);
565
for (j = 0; j < cols; j++)
566
data[j] = scale2*data[j];
567
}
568
break;
569
570
default:
571
;
572
}
573
}
574
575
void calc_activ_func_deriv(Mat& _xf, Mat& _df, const Mat& w) const
576
{
577
const double* bias = w.ptr<double>(w.rows - 1);
578
int i, j, n = _xf.rows, cols = _xf.cols;
579
580
if (activ_func == IDENTITY)
581
{
582
for (i = 0; i < n; i++)
583
{
584
double* xf = _xf.ptr<double>(i);
585
double* df = _df.ptr<double>(i);
586
587
for (j = 0; j < cols; j++)
588
{
589
xf[j] += bias[j];
590
df[j] = 1;
591
}
592
}
593
}
594
else if (activ_func == RELU)
595
{
596
for (i = 0; i < n; i++)
597
{
598
double* xf = _xf.ptr<double>(i);
599
double* df = _df.ptr<double>(i);
600
601
for (j = 0; j < cols; j++)
602
{
603
xf[j] += bias[j];
604
if (xf[j] < 0)
605
{
606
xf[j] = 0;
607
df[j] = 0;
608
}
609
else
610
df[j] = 1;
611
}
612
}
613
}
614
else if (activ_func == LEAKYRELU)
615
{
616
for (i = 0; i < n; i++)
617
{
618
double* xf = _xf.ptr<double>(i);
619
double* df = _df.ptr<double>(i);
620
621
for (j = 0; j < cols; j++)
622
{
623
xf[j] += bias[j];
624
if (xf[j] < 0)
625
{
626
xf[j] = f_param1*xf[j];
627
df[j] = f_param1;
628
}
629
else
630
df[j] = 1;
631
}
632
}
633
}
634
else if (activ_func == GAUSSIAN)
635
{
636
double scale = -f_param1*f_param1;
637
double scale2 = scale*f_param2;
638
for (i = 0; i < n; i++)
639
{
640
double* xf = _xf.ptr<double>(i);
641
double* df = _df.ptr<double>(i);
642
643
for (j = 0; j < cols; j++)
644
{
645
double t = xf[j] + bias[j];
646
df[j] = t * 2 * scale2;
647
xf[j] = t*t*scale;
648
}
649
}
650
exp(_xf, _xf);
651
652
for (i = 0; i < n; i++)
653
{
654
double* xf = _xf.ptr<double>(i);
655
double* df = _df.ptr<double>(i);
656
657
for (j = 0; j < cols; j++)
658
df[j] *= xf[j];
659
}
660
}
661
else
662
{
663
double scale = f_param1;
664
double scale2 = f_param2;
665
666
for (i = 0; i < n; i++)
667
{
668
double* xf = _xf.ptr<double>(i);
669
double* df = _df.ptr<double>(i);
670
671
for (j = 0; j < cols; j++)
672
{
673
xf[j] = (xf[j] + bias[j])*scale;
674
df[j] = -fabs(xf[j]);
675
}
676
}
677
678
exp(_df, _df);
679
680
// ((1+exp(-ax))^-1)'=a*((1+exp(-ax))^-2)*exp(-ax);
681
// ((1-exp(-ax))/(1+exp(-ax)))'=(a*exp(-ax)*(1+exp(-ax)) + a*exp(-ax)*(1-exp(-ax)))/(1+exp(-ax))^2=
682
// 2*a*exp(-ax)/(1+exp(-ax))^2
683
scale *= 2 * f_param2;
684
for (i = 0; i < n; i++)
685
{
686
double* xf = _xf.ptr<double>(i);
687
double* df = _df.ptr<double>(i);
688
689
for (j = 0; j < cols; j++)
690
{
691
int s0 = xf[j] > 0 ? 1 : -1;
692
double t0 = 1. / (1. + df[j]);
693
double t1 = scale*df[j] * t0*t0;
694
t0 *= scale2*(1. - df[j])*s0;
695
df[j] = t1;
696
xf[j] = t0;
697
}
698
}
699
}
700
}
701
702
void calc_input_scale( const Mat& inputs, int flags )
703
{
704
bool reset_weights = (flags & UPDATE_WEIGHTS) == 0;
705
bool no_scale = (flags & NO_INPUT_SCALE) != 0;
706
double* scale = weights[0].ptr<double>();
707
int count = inputs.rows;
708
709
if( reset_weights )
710
{
711
int i, j, vcount = layer_sizes[0];
712
int type = inputs.type();
713
double a = no_scale ? 1. : 0.;
714
715
for( j = 0; j < vcount; j++ )
716
scale[2*j] = a, scale[j*2+1] = 0.;
717
718
if( no_scale )
719
return;
720
721
for( i = 0; i < count; i++ )
722
{
723
const uchar* p = inputs.ptr(i);
724
const float* f = (const float*)p;
725
const double* d = (const double*)p;
726
for( j = 0; j < vcount; j++ )
727
{
728
double t = type == CV_32F ? (double)f[j] : d[j];
729
scale[j*2] += t;
730
scale[j*2+1] += t*t;
731
}
732
}
733
734
for( j = 0; j < vcount; j++ )
735
{
736
double s = scale[j*2], s2 = scale[j*2+1];
737
double m = s/count, sigma2 = s2/count - m*m;
738
scale[j*2] = sigma2 < DBL_EPSILON ? 1 : 1./sqrt(sigma2);
739
scale[j*2+1] = -m*scale[j*2];
740
}
741
}
742
}
743
744
void calc_output_scale( const Mat& outputs, int flags )
745
{
746
int i, j, vcount = layer_sizes.back();
747
int type = outputs.type();
748
double m = min_val, M = max_val, m1 = min_val1, M1 = max_val1;
749
bool reset_weights = (flags & UPDATE_WEIGHTS) == 0;
750
bool no_scale = (flags & NO_OUTPUT_SCALE) != 0;
751
int l_count = layer_count();
752
double* scale = weights[l_count].ptr<double>();
753
double* inv_scale = weights[l_count+1].ptr<double>();
754
int count = outputs.rows;
755
756
if( reset_weights )
757
{
758
double a0 = no_scale ? 1 : DBL_MAX, b0 = no_scale ? 0 : -DBL_MAX;
759
760
for( j = 0; j < vcount; j++ )
761
{
762
scale[2*j] = inv_scale[2*j] = a0;
763
scale[j*2+1] = inv_scale[2*j+1] = b0;
764
}
765
766
if( no_scale )
767
return;
768
}
769
770
for( i = 0; i < count; i++ )
771
{
772
const uchar* p = outputs.ptr(i);
773
const float* f = (const float*)p;
774
const double* d = (const double*)p;
775
776
for( j = 0; j < vcount; j++ )
777
{
778
double t = type == CV_32F ? (double)f[j] : d[j];
779
780
if( reset_weights )
781
{
782
double mj = scale[j*2], Mj = scale[j*2+1];
783
if( mj > t ) mj = t;
784
if( Mj < t ) Mj = t;
785
786
scale[j*2] = mj;
787
scale[j*2+1] = Mj;
788
}
789
else if( !no_scale )
790
{
791
t = t*inv_scale[j*2] + inv_scale[2*j+1];
792
if( t < m1 || t > M1 )
793
CV_Error( CV_StsOutOfRange,
794
"Some of new output training vector components run exceed the original range too much" );
795
}
796
}
797
}
798
799
if( reset_weights )
800
for( j = 0; j < vcount; j++ )
801
{
802
// map mj..Mj to m..M
803
double mj = scale[j*2], Mj = scale[j*2+1];
804
double a, b;
805
double delta = Mj - mj;
806
if( delta < DBL_EPSILON )
807
a = 1, b = (M + m - Mj - mj)*0.5;
808
else
809
a = (M - m)/delta, b = m - mj*a;
810
inv_scale[j*2] = a; inv_scale[j*2+1] = b;
811
a = 1./a; b = -b*a;
812
scale[j*2] = a; scale[j*2+1] = b;
813
}
814
}
815
816
void prepare_to_train( const Mat& inputs, const Mat& outputs,
817
Mat& sample_weights, int flags )
818
{
819
if( layer_sizes.empty() )
820
CV_Error( CV_StsError,
821
"The network has not been created. Use method create or the appropriate constructor" );
822
823
if( (inputs.type() != CV_32F && inputs.type() != CV_64F) ||
824
inputs.cols != layer_sizes[0] )
825
CV_Error( CV_StsBadArg,
826
"input training data should be a floating-point matrix with "
827
"the number of rows equal to the number of training samples and "
828
"the number of columns equal to the size of 0-th (input) layer" );
829
830
if( (outputs.type() != CV_32F && outputs.type() != CV_64F) ||
831
outputs.cols != layer_sizes.back() )
832
CV_Error( CV_StsBadArg,
833
"output training data should be a floating-point matrix with "
834
"the number of rows equal to the number of training samples and "
835
"the number of columns equal to the size of last (output) layer" );
836
837
if( inputs.rows != outputs.rows )
838
CV_Error( CV_StsUnmatchedSizes, "The numbers of input and output samples do not match" );
839
840
Mat temp;
841
double s = sum(sample_weights)[0];
842
sample_weights.convertTo(temp, CV_64F, 1./s);
843
sample_weights = temp;
844
845
calc_input_scale( inputs, flags );
846
calc_output_scale( outputs, flags );
847
}
848
849
bool train( const Ptr<TrainData>& trainData, int flags ) CV_OVERRIDE
850
{
851
const int MAX_ITER = 1000;
852
const double DEFAULT_EPSILON = FLT_EPSILON;
853
854
// initialize training data
855
Mat inputs = trainData->getTrainSamples();
856
Mat outputs = trainData->getTrainResponses();
857
Mat sw = trainData->getTrainSampleWeights();
858
prepare_to_train( inputs, outputs, sw, flags );
859
860
// ... and link weights
861
if( !(flags & UPDATE_WEIGHTS) )
862
init_weights();
863
864
TermCriteria termcrit;
865
termcrit.type = TermCriteria::COUNT + TermCriteria::EPS;
866
termcrit.maxCount = std::max((params.termCrit.type & CV_TERMCRIT_ITER ? params.termCrit.maxCount : MAX_ITER), 1);
867
termcrit.epsilon = std::max((params.termCrit.type & CV_TERMCRIT_EPS ? params.termCrit.epsilon : DEFAULT_EPSILON), DBL_EPSILON);
868
869
int iter = 0;
870
switch(params.trainMethod){
871
case ANN_MLP::BACKPROP:
872
iter = train_backprop(inputs, outputs, sw, termcrit);
873
break;
874
case ANN_MLP::RPROP:
875
iter = train_rprop(inputs, outputs, sw, termcrit);
876
break;
877
case ANN_MLP::ANNEAL:
878
iter = train_anneal(trainData);
879
break;
880
}
881
trained = iter > 0;
882
return trained;
883
}
884
int train_anneal(const Ptr<TrainData>& trainData)
885
{
886
SimulatedAnnealingANN_MLP s(*this, trainData);
887
trained = true; // Enable call to CalcError
888
int iter = simulatedAnnealingSolver(s, params.initialT, params.finalT, params.coolingRatio, params.itePerStep, NULL, params.rEnergy);
889
trained =false;
890
return iter + 1; // ensure that 'train()' call is always successful
891
}
892
893
int train_backprop( const Mat& inputs, const Mat& outputs, const Mat& _sw, TermCriteria termCrit )
894
{
895
int i, j, k;
896
double prev_E = DBL_MAX*0.5, E = 0;
897
int itype = inputs.type(), otype = outputs.type();
898
899
int count = inputs.rows;
900
901
int iter = -1, max_iter = termCrit.maxCount*count;
902
double epsilon = termCrit.epsilon*count;
903
904
int l_count = layer_count();
905
int ivcount = layer_sizes[0];
906
int ovcount = layer_sizes.back();
907
908
// allocate buffers
909
vector<vector<double> > x(l_count);
910
vector<vector<double> > df(l_count);
911
vector<Mat> dw(l_count);
912
913
for( i = 0; i < l_count; i++ )
914
{
915
int n = layer_sizes[i];
916
x[i].resize(n+1);
917
df[i].resize(n);
918
dw[i] = Mat::zeros(weights[i].size(), CV_64F);
919
}
920
921
Mat _idx_m(1, count, CV_32S);
922
int* _idx = _idx_m.ptr<int>();
923
for( i = 0; i < count; i++ )
924
_idx[i] = i;
925
926
AutoBuffer<double> _buf(max_lsize*2);
927
double* buf[] = { _buf.data(), _buf.data() + max_lsize };
928
929
const double* sw = _sw.empty() ? 0 : _sw.ptr<double>();
930
931
// run back-propagation loop
932
/*
933
y_i = w_i*x_{i-1}
934
x_i = f(y_i)
935
E = 1/2*||u - x_N||^2
936
grad_N = (x_N - u)*f'(y_i)
937
dw_i(t) = momentum*dw_i(t-1) + dw_scale*x_{i-1}*grad_i
938
w_i(t+1) = w_i(t) + dw_i(t)
939
grad_{i-1} = w_i^t*grad_i
940
*/
941
for( iter = 0; iter < max_iter; iter++ )
942
{
943
int idx = iter % count;
944
double sweight = sw ? count*sw[idx] : 1.;
945
946
if( idx == 0 )
947
{
948
//printf("%d. E = %g\n", iter/count, E);
949
if( fabs(prev_E - E) < epsilon )
950
break;
951
prev_E = E;
952
E = 0;
953
954
// shuffle indices
955
for( i = 0; i <count; i++ )
956
{
957
j = rng.uniform(0, count);
958
k = rng.uniform(0, count);
959
std::swap(_idx[j], _idx[k]);
960
}
961
}
962
963
idx = _idx[idx];
964
965
const uchar* x0data_p = inputs.ptr(idx);
966
const float* x0data_f = (const float*)x0data_p;
967
const double* x0data_d = (const double*)x0data_p;
968
969
double* w = weights[0].ptr<double>();
970
for( j = 0; j < ivcount; j++ )
971
x[0][j] = (itype == CV_32F ? (double)x0data_f[j] : x0data_d[j])*w[j*2] + w[j*2 + 1];
972
973
Mat x1( 1, ivcount, CV_64F, &x[0][0] );
974
975
// forward pass, compute y[i]=w*x[i-1], x[i]=f(y[i]), df[i]=f'(y[i])
976
for( i = 1; i < l_count; i++ )
977
{
978
int n = layer_sizes[i];
979
Mat x2(1, n, CV_64F, &x[i][0] );
980
Mat _w = weights[i].rowRange(0, x1.cols);
981
gemm(x1, _w, 1, noArray(), 0, x2);
982
Mat _df(1, n, CV_64F, &df[i][0] );
983
calc_activ_func_deriv( x2, _df, weights[i] );
984
x1 = x2;
985
}
986
987
Mat grad1( 1, ovcount, CV_64F, buf[l_count&1] );
988
w = weights[l_count+1].ptr<double>();
989
990
// calculate error
991
const uchar* udata_p = outputs.ptr(idx);
992
const float* udata_f = (const float*)udata_p;
993
const double* udata_d = (const double*)udata_p;
994
995
double* gdata = grad1.ptr<double>();
996
for( k = 0; k < ovcount; k++ )
997
{
998
double t = (otype == CV_32F ? (double)udata_f[k] : udata_d[k])*w[k*2] + w[k*2+1] - x[l_count-1][k];
999
gdata[k] = t*sweight;
1000
E += t*t;
1001
}
1002
E *= sweight;
1003
1004
// backward pass, update weights
1005
for( i = l_count-1; i > 0; i-- )
1006
{
1007
int n1 = layer_sizes[i-1], n2 = layer_sizes[i];
1008
Mat _df(1, n2, CV_64F, &df[i][0]);
1009
multiply( grad1, _df, grad1 );
1010
Mat _x(n1+1, 1, CV_64F, &x[i-1][0]);
1011
x[i-1][n1] = 1.;
1012
gemm( _x, grad1, params.bpDWScale, dw[i], params.bpMomentScale, dw[i] );
1013
add( weights[i], dw[i], weights[i] );
1014
if( i > 1 )
1015
{
1016
Mat grad2(1, n1, CV_64F, buf[i&1]);
1017
Mat _w = weights[i].rowRange(0, n1);
1018
gemm( grad1, _w, 1, noArray(), 0, grad2, GEMM_2_T );
1019
grad1 = grad2;
1020
}
1021
}
1022
}
1023
1024
iter /= count;
1025
return iter;
1026
}
1027
1028
struct RPropLoop : public ParallelLoopBody
1029
{
1030
RPropLoop(ANN_MLPImpl* _ann,
1031
const Mat& _inputs, const Mat& _outputs, const Mat& _sw,
1032
int _dcount0, vector<Mat>& _dEdw, double* _E)
1033
{
1034
ann = _ann;
1035
inputs = _inputs;
1036
outputs = _outputs;
1037
sw = _sw.ptr<double>();
1038
dcount0 = _dcount0;
1039
dEdw = &_dEdw;
1040
pE = _E;
1041
}
1042
1043
ANN_MLPImpl* ann;
1044
vector<Mat>* dEdw;
1045
Mat inputs, outputs;
1046
const double* sw;
1047
int dcount0;
1048
double* pE;
1049
1050
void operator()(const Range& range) const CV_OVERRIDE
1051
{
1052
double inv_count = 1./inputs.rows;
1053
int ivcount = ann->layer_sizes.front();
1054
int ovcount = ann->layer_sizes.back();
1055
int itype = inputs.type(), otype = outputs.type();
1056
int count = inputs.rows;
1057
int i, j, k, l_count = ann->layer_count();
1058
vector<vector<double> > x(l_count);
1059
vector<vector<double> > df(l_count);
1060
vector<double> _buf(ann->max_lsize*dcount0*2);
1061
double* buf[] = { &_buf[0], &_buf[ann->max_lsize*dcount0] };
1062
double E = 0;
1063
1064
for( i = 0; i < l_count; i++ )
1065
{
1066
x[i].resize(ann->layer_sizes[i]*dcount0);
1067
df[i].resize(ann->layer_sizes[i]*dcount0);
1068
}
1069
1070
for( int si = range.start; si < range.end; si++ )
1071
{
1072
int i0 = si*dcount0, i1 = std::min((si + 1)*dcount0, count);
1073
int dcount = i1 - i0;
1074
const double* w = ann->weights[0].ptr<double>();
1075
1076
// grab and preprocess input data
1077
for( i = 0; i < dcount; i++ )
1078
{
1079
const uchar* x0data_p = inputs.ptr(i0 + i);
1080
const float* x0data_f = (const float*)x0data_p;
1081
const double* x0data_d = (const double*)x0data_p;
1082
1083
double* xdata = &x[0][i*ivcount];
1084
for( j = 0; j < ivcount; j++ )
1085
xdata[j] = (itype == CV_32F ? (double)x0data_f[j] : x0data_d[j])*w[j*2] + w[j*2+1];
1086
}
1087
Mat x1(dcount, ivcount, CV_64F, &x[0][0]);
1088
1089
// forward pass, compute y[i]=w*x[i-1], x[i]=f(y[i]), df[i]=f'(y[i])
1090
for( i = 1; i < l_count; i++ )
1091
{
1092
Mat x2( dcount, ann->layer_sizes[i], CV_64F, &x[i][0] );
1093
Mat _w = ann->weights[i].rowRange(0, x1.cols);
1094
gemm( x1, _w, 1, noArray(), 0, x2 );
1095
Mat _df( x2.size(), CV_64F, &df[i][0] );
1096
ann->calc_activ_func_deriv( x2, _df, ann->weights[i] );
1097
x1 = x2;
1098
}
1099
1100
Mat grad1(dcount, ovcount, CV_64F, buf[l_count & 1]);
1101
1102
w = ann->weights[l_count+1].ptr<double>();
1103
1104
// calculate error
1105
for( i = 0; i < dcount; i++ )
1106
{
1107
const uchar* udata_p = outputs.ptr(i0+i);
1108
const float* udata_f = (const float*)udata_p;
1109
const double* udata_d = (const double*)udata_p;
1110
1111
const double* xdata = &x[l_count-1][i*ovcount];
1112
double* gdata = grad1.ptr<double>(i);
1113
double sweight = sw ? sw[si+i] : inv_count, E1 = 0;
1114
1115
for( j = 0; j < ovcount; j++ )
1116
{
1117
double t = (otype == CV_32F ? (double)udata_f[j] : udata_d[j])*w[j*2] + w[j*2+1] - xdata[j];
1118
gdata[j] = t*sweight;
1119
E1 += t*t;
1120
}
1121
E += sweight*E1;
1122
}
1123
1124
for( i = l_count-1; i > 0; i-- )
1125
{
1126
int n1 = ann->layer_sizes[i-1], n2 = ann->layer_sizes[i];
1127
Mat _df(dcount, n2, CV_64F, &df[i][0]);
1128
multiply(grad1, _df, grad1);
1129
1130
{
1131
AutoLock lock(ann->mtx);
1132
Mat _dEdw = dEdw->at(i).rowRange(0, n1);
1133
x1 = Mat(dcount, n1, CV_64F, &x[i-1][0]);
1134
gemm(x1, grad1, 1, _dEdw, 1, _dEdw, GEMM_1_T);
1135
1136
// update bias part of dEdw
1137
double* dst = dEdw->at(i).ptr<double>(n1);
1138
for( k = 0; k < dcount; k++ )
1139
{
1140
const double* src = grad1.ptr<double>(k);
1141
for( j = 0; j < n2; j++ )
1142
dst[j] += src[j];
1143
}
1144
}
1145
1146
Mat grad2( dcount, n1, CV_64F, buf[i&1] );
1147
if( i > 1 )
1148
{
1149
Mat _w = ann->weights[i].rowRange(0, n1);
1150
gemm(grad1, _w, 1, noArray(), 0, grad2, GEMM_2_T);
1151
}
1152
grad1 = grad2;
1153
}
1154
}
1155
{
1156
AutoLock lock(ann->mtx);
1157
*pE += E;
1158
}
1159
}
1160
};
1161
1162
int train_rprop( const Mat& inputs, const Mat& outputs, const Mat& _sw, TermCriteria termCrit )
1163
{
1164
const int max_buf_size = 1 << 16;
1165
int i, iter = -1, count = inputs.rows;
1166
1167
double prev_E = DBL_MAX*0.5;
1168
1169
int max_iter = termCrit.maxCount;
1170
double epsilon = termCrit.epsilon;
1171
double dw_plus = params.rpDWPlus;
1172
double dw_minus = params.rpDWMinus;
1173
double dw_min = params.rpDWMin;
1174
double dw_max = params.rpDWMax;
1175
1176
int l_count = layer_count();
1177
1178
// allocate buffers
1179
vector<Mat> dw(l_count), dEdw(l_count), prev_dEdw_sign(l_count);
1180
1181
int total = 0;
1182
for( i = 0; i < l_count; i++ )
1183
{
1184
total += layer_sizes[i];
1185
dw[i].create(weights[i].size(), CV_64F);
1186
dw[i].setTo(Scalar::all(params.rpDW0));
1187
prev_dEdw_sign[i] = Mat::zeros(weights[i].size(), CV_8S);
1188
dEdw[i] = Mat::zeros(weights[i].size(), CV_64F);
1189
}
1190
CV_Assert(total > 0);
1191
int dcount0 = max_buf_size/(2*total);
1192
dcount0 = std::max( dcount0, 1 );
1193
dcount0 = std::min( dcount0, count );
1194
int chunk_count = (count + dcount0 - 1)/dcount0;
1195
1196
// run rprop loop
1197
/*
1198
y_i(t) = w_i(t)*x_{i-1}(t)
1199
x_i(t) = f(y_i(t))
1200
E = sum_over_all_samples(1/2*||u - x_N||^2)
1201
grad_N = (x_N - u)*f'(y_i)
1202
1203
std::min(dw_i{jk}(t)*dw_plus, dw_max), if dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) > 0
1204
dw_i{jk}(t) = std::max(dw_i{jk}(t)*dw_minus, dw_min), if dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) < 0
1205
dw_i{jk}(t-1) else
1206
1207
if (dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) < 0)
1208
dE/dw_i{jk}(t)<-0
1209
else
1210
w_i{jk}(t+1) = w_i{jk}(t) + dw_i{jk}(t)
1211
grad_{i-1}(t) = w_i^t(t)*grad_i(t)
1212
*/
1213
for( iter = 0; iter < max_iter; iter++ )
1214
{
1215
double E = 0;
1216
1217
for( i = 0; i < l_count; i++ )
1218
dEdw[i].setTo(Scalar::all(0));
1219
1220
// first, iterate through all the samples and compute dEdw
1221
RPropLoop invoker(this, inputs, outputs, _sw, dcount0, dEdw, &E);
1222
parallel_for_(Range(0, chunk_count), invoker);
1223
//invoker(Range(0, chunk_count));
1224
1225
// now update weights
1226
for( i = 1; i < l_count; i++ )
1227
{
1228
int n1 = layer_sizes[i-1], n2 = layer_sizes[i];
1229
for( int k = 0; k <= n1; k++ )
1230
{
1231
CV_Assert(weights[i].size() == Size(n2, n1+1));
1232
double* wk = weights[i].ptr<double>(k);
1233
double* dwk = dw[i].ptr<double>(k);
1234
double* dEdwk = dEdw[i].ptr<double>(k);
1235
schar* prevEk = prev_dEdw_sign[i].ptr<schar>(k);
1236
1237
for( int j = 0; j < n2; j++ )
1238
{
1239
double Eval = dEdwk[j];
1240
double dval = dwk[j];
1241
double wval = wk[j];
1242
int s = CV_SIGN(Eval);
1243
int ss = prevEk[j]*s;
1244
if( ss > 0 )
1245
{
1246
dval *= dw_plus;
1247
dval = std::min( dval, dw_max );
1248
dwk[j] = dval;
1249
wk[j] = wval + dval*s;
1250
}
1251
else if( ss < 0 )
1252
{
1253
dval *= dw_minus;
1254
dval = std::max( dval, dw_min );
1255
prevEk[j] = 0;
1256
dwk[j] = dval;
1257
wk[j] = wval + dval*s;
1258
}
1259
else
1260
{
1261
prevEk[j] = (schar)s;
1262
wk[j] = wval + dval*s;
1263
}
1264
dEdwk[j] = 0.;
1265
}
1266
}
1267
}
1268
1269
//printf("%d. E = %g\n", iter, E);
1270
if( fabs(prev_E - E) < epsilon )
1271
break;
1272
prev_E = E;
1273
}
1274
1275
return iter;
1276
}
1277
1278
void write_params( FileStorage& fs ) const
1279
{
1280
const char* activ_func_name = activ_func == IDENTITY ? "IDENTITY" :
1281
activ_func == SIGMOID_SYM ? "SIGMOID_SYM" :
1282
activ_func == GAUSSIAN ? "GAUSSIAN" :
1283
activ_func == RELU ? "RELU" :
1284
activ_func == LEAKYRELU ? "LEAKYRELU" : 0;
1285
1286
if( activ_func_name )
1287
fs << "activation_function" << activ_func_name;
1288
else
1289
fs << "activation_function_id" << activ_func;
1290
1291
if( activ_func != IDENTITY )
1292
{
1293
fs << "f_param1" << f_param1;
1294
fs << "f_param2" << f_param2;
1295
}
1296
1297
fs << "min_val" << min_val << "max_val" << max_val << "min_val1" << min_val1 << "max_val1" << max_val1;
1298
1299
fs << "training_params" << "{";
1300
if( params.trainMethod == ANN_MLP::BACKPROP )
1301
{
1302
fs << "train_method" << "BACKPROP";
1303
fs << "dw_scale" << params.bpDWScale;
1304
fs << "moment_scale" << params.bpMomentScale;
1305
}
1306
else if (params.trainMethod == ANN_MLP::RPROP)
1307
{
1308
fs << "train_method" << "RPROP";
1309
fs << "dw0" << params.rpDW0;
1310
fs << "dw_plus" << params.rpDWPlus;
1311
fs << "dw_minus" << params.rpDWMinus;
1312
fs << "dw_min" << params.rpDWMin;
1313
fs << "dw_max" << params.rpDWMax;
1314
}
1315
else if (params.trainMethod == ANN_MLP::ANNEAL)
1316
{
1317
fs << "train_method" << "ANNEAL";
1318
fs << "initialT" << params.initialT;
1319
fs << "finalT" << params.finalT;
1320
fs << "coolingRatio" << params.coolingRatio;
1321
fs << "itePerStep" << params.itePerStep;
1322
}
1323
else
1324
CV_Error(CV_StsError, "Unknown training method");
1325
1326
fs << "term_criteria" << "{";
1327
if( params.termCrit.type & TermCriteria::EPS )
1328
fs << "epsilon" << params.termCrit.epsilon;
1329
if( params.termCrit.type & TermCriteria::COUNT )
1330
fs << "iterations" << params.termCrit.maxCount;
1331
fs << "}" << "}";
1332
}
1333
1334
void write( FileStorage& fs ) const CV_OVERRIDE
1335
{
1336
if( layer_sizes.empty() )
1337
return;
1338
int i, l_count = layer_count();
1339
1340
writeFormat(fs);
1341
fs << "layer_sizes" << layer_sizes;
1342
1343
write_params( fs );
1344
1345
size_t esz = weights[0].elemSize();
1346
1347
fs << "input_scale" << "[";
1348
fs.writeRaw("d", weights[0].ptr(), weights[0].total()*esz);
1349
1350
fs << "]" << "output_scale" << "[";
1351
fs.writeRaw("d", weights[l_count].ptr(), weights[l_count].total()*esz);
1352
1353
fs << "]" << "inv_output_scale" << "[";
1354
fs.writeRaw("d", weights[l_count+1].ptr(), weights[l_count+1].total()*esz);
1355
1356
fs << "]" << "weights" << "[";
1357
for( i = 1; i < l_count; i++ )
1358
{
1359
fs << "[";
1360
fs.writeRaw("d", weights[i].ptr(), weights[i].total()*esz);
1361
fs << "]";
1362
}
1363
fs << "]";
1364
}
1365
1366
void read_params( const FileNode& fn )
1367
{
1368
String activ_func_name = (String)fn["activation_function"];
1369
if( !activ_func_name.empty() )
1370
{
1371
activ_func = activ_func_name == "SIGMOID_SYM" ? SIGMOID_SYM :
1372
activ_func_name == "IDENTITY" ? IDENTITY :
1373
activ_func_name == "RELU" ? RELU :
1374
activ_func_name == "LEAKYRELU" ? LEAKYRELU :
1375
activ_func_name == "GAUSSIAN" ? GAUSSIAN : -1;
1376
CV_Assert( activ_func >= 0 );
1377
}
1378
else
1379
activ_func = (int)fn["activation_function_id"];
1380
1381
f_param1 = (double)fn["f_param1"];
1382
f_param2 = (double)fn["f_param2"];
1383
1384
setActivationFunction( activ_func, f_param1, f_param2);
1385
1386
min_val = (double)fn["min_val"];
1387
max_val = (double)fn["max_val"];
1388
min_val1 = (double)fn["min_val1"];
1389
max_val1 = (double)fn["max_val1"];
1390
1391
FileNode tpn = fn["training_params"];
1392
params = AnnParams();
1393
1394
if( !tpn.empty() )
1395
{
1396
String tmethod_name = (String)tpn["train_method"];
1397
1398
if( tmethod_name == "BACKPROP" )
1399
{
1400
params.trainMethod = ANN_MLP::BACKPROP;
1401
params.bpDWScale = (double)tpn["dw_scale"];
1402
params.bpMomentScale = (double)tpn["moment_scale"];
1403
}
1404
else if (tmethod_name == "RPROP")
1405
{
1406
params.trainMethod = ANN_MLP::RPROP;
1407
params.rpDW0 = (double)tpn["dw0"];
1408
params.rpDWPlus = (double)tpn["dw_plus"];
1409
params.rpDWMinus = (double)tpn["dw_minus"];
1410
params.rpDWMin = (double)tpn["dw_min"];
1411
params.rpDWMax = (double)tpn["dw_max"];
1412
}
1413
else if (tmethod_name == "ANNEAL")
1414
{
1415
params.trainMethod = ANN_MLP::ANNEAL;
1416
params.initialT = (double)tpn["initialT"];
1417
params.finalT = (double)tpn["finalT"];
1418
params.coolingRatio = (double)tpn["coolingRatio"];
1419
params.itePerStep = tpn["itePerStep"];
1420
}
1421
else
1422
CV_Error(CV_StsParseError, "Unknown training method (should be BACKPROP or RPROP)");
1423
1424
FileNode tcn = tpn["term_criteria"];
1425
if( !tcn.empty() )
1426
{
1427
FileNode tcn_e = tcn["epsilon"];
1428
FileNode tcn_i = tcn["iterations"];
1429
params.termCrit.type = 0;
1430
if( !tcn_e.empty() )
1431
{
1432
params.termCrit.type |= TermCriteria::EPS;
1433
params.termCrit.epsilon = (double)tcn_e;
1434
}
1435
if( !tcn_i.empty() )
1436
{
1437
params.termCrit.type |= TermCriteria::COUNT;
1438
params.termCrit.maxCount = (int)tcn_i;
1439
}
1440
}
1441
}
1442
}
1443
1444
void read( const FileNode& fn ) CV_OVERRIDE
1445
{
1446
clear();
1447
1448
vector<int> _layer_sizes;
1449
readVectorOrMat(fn["layer_sizes"], _layer_sizes);
1450
setLayerSizes( _layer_sizes );
1451
1452
int i, l_count = layer_count();
1453
read_params(fn);
1454
1455
size_t esz = weights[0].elemSize();
1456
1457
FileNode w = fn["input_scale"];
1458
w.readRaw("d", weights[0].ptr(), weights[0].total()*esz);
1459
1460
w = fn["output_scale"];
1461
w.readRaw("d", weights[l_count].ptr(), weights[l_count].total()*esz);
1462
1463
w = fn["inv_output_scale"];
1464
w.readRaw("d", weights[l_count+1].ptr(), weights[l_count+1].total()*esz);
1465
1466
FileNodeIterator w_it = fn["weights"].begin();
1467
1468
for( i = 1; i < l_count; i++, ++w_it )
1469
(*w_it).readRaw("d", weights[i].ptr(), weights[i].total()*esz);
1470
trained = true;
1471
}
1472
1473
Mat getWeights(int layerIdx) const CV_OVERRIDE
1474
{
1475
CV_Assert( 0 <= layerIdx && layerIdx < (int)weights.size() );
1476
return weights[layerIdx];
1477
}
1478
1479
bool isTrained() const CV_OVERRIDE
1480
{
1481
return trained;
1482
}
1483
1484
bool isClassifier() const CV_OVERRIDE
1485
{
1486
return false;
1487
}
1488
1489
int getVarCount() const CV_OVERRIDE
1490
{
1491
return layer_sizes.empty() ? 0 : layer_sizes[0];
1492
}
1493
1494
String getDefaultName() const CV_OVERRIDE
1495
{
1496
return "opencv_ml_ann_mlp";
1497
}
1498
1499
vector<int> layer_sizes;
1500
vector<Mat> weights;
1501
double f_param1, f_param2;
1502
double min_val, max_val, min_val1, max_val1;
1503
int activ_func;
1504
int max_lsize, max_buf_sz;
1505
AnnParams params;
1506
RNG rng;
1507
Mutex mtx;
1508
bool trained;
1509
};
1510
1511
1512
1513
1514
Ptr<ANN_MLP> ANN_MLP::create()
1515
{
1516
return makePtr<ANN_MLPImpl>();
1517
}
1518
1519
Ptr<ANN_MLP> ANN_MLP::load(const String& filepath)
1520
{
1521
FileStorage fs;
1522
fs.open(filepath, FileStorage::READ);
1523
CV_Assert(fs.isOpened());
1524
Ptr<ANN_MLP> ann = makePtr<ANN_MLPImpl>();
1525
((ANN_MLPImpl*)ann.get())->read(fs.getFirstTopLevelNode());
1526
return ann;
1527
}
1528
1529
}}
1530
1531
/* End of file. */
1532
1533