Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/apps/traincascade/boost.cpp
16337 views
1
#include "opencv2/core.hpp"
2
#include "opencv2/core/utility.hpp"
3
4
using cv::Size;
5
using cv::Mat;
6
using cv::Point;
7
using cv::FileStorage;
8
using cv::Rect;
9
using cv::Ptr;
10
using cv::FileNode;
11
using cv::Mat_;
12
using cv::Range;
13
using cv::FileNodeIterator;
14
using cv::ParallelLoopBody;
15
16
17
using cv::Size;
18
using cv::Mat;
19
using cv::Point;
20
using cv::FileStorage;
21
using cv::Rect;
22
using cv::Ptr;
23
using cv::FileNode;
24
using cv::Mat_;
25
using cv::Range;
26
using cv::FileNodeIterator;
27
using cv::ParallelLoopBody;
28
29
30
#include "boost.h"
31
#include "cascadeclassifier.h"
32
#include <queue>
33
34
#include "cvconfig.h"
35
36
using namespace std;
37
38
static inline double
39
logRatio( double val )
40
{
41
const double eps = 1e-5;
42
43
val = max( val, eps );
44
val = min( val, 1. - eps );
45
return log( val/(1. - val) );
46
}
47
48
template<typename T, typename Idx>
49
class LessThanIdx
50
{
51
public:
52
LessThanIdx( const T* _arr ) : arr(_arr) {}
53
bool operator()(Idx a, Idx b) const { return arr[a] < arr[b]; }
54
const T* arr;
55
};
56
57
static inline int cvAlign( int size, int align )
58
{
59
CV_DbgAssert( (align & (align-1)) == 0 && size < INT_MAX );
60
return (size + align - 1) & -align;
61
}
62
63
#define CV_THRESHOLD_EPS (0.00001F)
64
65
static const int MinBlockSize = 1 << 16;
66
static const int BlockSizeDelta = 1 << 10;
67
68
// TODO remove this code duplication with ml/precomp.hpp
69
70
static int CV_CDECL icvCmpIntegers( const void* a, const void* b )
71
{
72
return *(const int*)a - *(const int*)b;
73
}
74
75
static CvMat* cvPreprocessIndexArray( const CvMat* idx_arr, int data_arr_size, bool check_for_duplicates=false )
76
{
77
CvMat* idx = 0;
78
79
CV_FUNCNAME( "cvPreprocessIndexArray" );
80
81
__CV_BEGIN__;
82
83
int i, idx_total, idx_selected = 0, step, type, prev = INT_MIN, is_sorted = 1;
84
uchar* srcb = 0;
85
int* srci = 0;
86
int* dsti;
87
88
if( !CV_IS_MAT(idx_arr) )
89
CV_ERROR( CV_StsBadArg, "Invalid index array" );
90
91
if( idx_arr->rows != 1 && idx_arr->cols != 1 )
92
CV_ERROR( CV_StsBadSize, "the index array must be 1-dimensional" );
93
94
idx_total = idx_arr->rows + idx_arr->cols - 1;
95
srcb = idx_arr->data.ptr;
96
srci = idx_arr->data.i;
97
98
type = CV_MAT_TYPE(idx_arr->type);
99
step = CV_IS_MAT_CONT(idx_arr->type) ? 1 : idx_arr->step/CV_ELEM_SIZE(type);
100
101
switch( type )
102
{
103
case CV_8UC1:
104
case CV_8SC1:
105
// idx_arr is array of 1's and 0's -
106
// i.e. it is a mask of the selected components
107
if( idx_total != data_arr_size )
108
CV_ERROR( CV_StsUnmatchedSizes,
109
"Component mask should contain as many elements as the total number of input variables" );
110
111
for( i = 0; i < idx_total; i++ )
112
idx_selected += srcb[i*step] != 0;
113
114
if( idx_selected == 0 )
115
CV_ERROR( CV_StsOutOfRange, "No components/input_variables is selected!" );
116
117
break;
118
case CV_32SC1:
119
// idx_arr is array of integer indices of selected components
120
if( idx_total > data_arr_size )
121
CV_ERROR( CV_StsOutOfRange,
122
"index array may not contain more elements than the total number of input variables" );
123
idx_selected = idx_total;
124
// check if sorted already
125
for( i = 0; i < idx_total; i++ )
126
{
127
int val = srci[i*step];
128
if( val >= prev )
129
{
130
is_sorted = 0;
131
break;
132
}
133
prev = val;
134
}
135
break;
136
default:
137
CV_ERROR( CV_StsUnsupportedFormat, "Unsupported index array data type "
138
"(it should be 8uC1, 8sC1 or 32sC1)" );
139
}
140
141
CV_CALL( idx = cvCreateMat( 1, idx_selected, CV_32SC1 ));
142
dsti = idx->data.i;
143
144
if( type < CV_32SC1 )
145
{
146
for( i = 0; i < idx_total; i++ )
147
if( srcb[i*step] )
148
*dsti++ = i;
149
}
150
else
151
{
152
for( i = 0; i < idx_total; i++ )
153
dsti[i] = srci[i*step];
154
155
if( !is_sorted )
156
qsort( dsti, idx_total, sizeof(dsti[0]), icvCmpIntegers );
157
158
if( dsti[0] < 0 || dsti[idx_total-1] >= data_arr_size )
159
CV_ERROR( CV_StsOutOfRange, "the index array elements are out of range" );
160
161
if( check_for_duplicates )
162
{
163
for( i = 1; i < idx_total; i++ )
164
if( dsti[i] <= dsti[i-1] )
165
CV_ERROR( CV_StsBadArg, "There are duplicated index array elements" );
166
}
167
}
168
169
__CV_END__;
170
171
if( cvGetErrStatus() < 0 )
172
cvReleaseMat( &idx );
173
174
return idx;
175
}
176
177
//----------------------------- CascadeBoostParams -------------------------------------------------
178
179
CvCascadeBoostParams::CvCascadeBoostParams() : minHitRate( 0.995F), maxFalseAlarm( 0.5F )
180
{
181
boost_type = CvBoost::GENTLE;
182
use_surrogates = use_1se_rule = truncate_pruned_tree = false;
183
}
184
185
CvCascadeBoostParams::CvCascadeBoostParams( int _boostType,
186
float _minHitRate, float _maxFalseAlarm,
187
double _weightTrimRate, int _maxDepth, int _maxWeakCount ) :
188
CvBoostParams( _boostType, _maxWeakCount, _weightTrimRate, _maxDepth, false, 0 )
189
{
190
boost_type = CvBoost::GENTLE;
191
minHitRate = _minHitRate;
192
maxFalseAlarm = _maxFalseAlarm;
193
use_surrogates = use_1se_rule = truncate_pruned_tree = false;
194
}
195
196
void CvCascadeBoostParams::write( FileStorage &fs ) const
197
{
198
string boostTypeStr = boost_type == CvBoost::DISCRETE ? CC_DISCRETE_BOOST :
199
boost_type == CvBoost::REAL ? CC_REAL_BOOST :
200
boost_type == CvBoost::LOGIT ? CC_LOGIT_BOOST :
201
boost_type == CvBoost::GENTLE ? CC_GENTLE_BOOST : string();
202
CV_Assert( !boostTypeStr.empty() );
203
fs << CC_BOOST_TYPE << boostTypeStr;
204
fs << CC_MINHITRATE << minHitRate;
205
fs << CC_MAXFALSEALARM << maxFalseAlarm;
206
fs << CC_TRIM_RATE << weight_trim_rate;
207
fs << CC_MAX_DEPTH << max_depth;
208
fs << CC_WEAK_COUNT << weak_count;
209
}
210
211
bool CvCascadeBoostParams::read( const FileNode &node )
212
{
213
string boostTypeStr;
214
FileNode rnode = node[CC_BOOST_TYPE];
215
rnode >> boostTypeStr;
216
boost_type = !boostTypeStr.compare( CC_DISCRETE_BOOST ) ? CvBoost::DISCRETE :
217
!boostTypeStr.compare( CC_REAL_BOOST ) ? CvBoost::REAL :
218
!boostTypeStr.compare( CC_LOGIT_BOOST ) ? CvBoost::LOGIT :
219
!boostTypeStr.compare( CC_GENTLE_BOOST ) ? CvBoost::GENTLE : -1;
220
if (boost_type == -1)
221
CV_Error( CV_StsBadArg, "unsupported Boost type" );
222
node[CC_MINHITRATE] >> minHitRate;
223
node[CC_MAXFALSEALARM] >> maxFalseAlarm;
224
node[CC_TRIM_RATE] >> weight_trim_rate ;
225
node[CC_MAX_DEPTH] >> max_depth ;
226
node[CC_WEAK_COUNT] >> weak_count ;
227
if ( minHitRate <= 0 || minHitRate > 1 ||
228
maxFalseAlarm <= 0 || maxFalseAlarm > 1 ||
229
weight_trim_rate <= 0 || weight_trim_rate > 1 ||
230
max_depth <= 0 || weak_count <= 0 )
231
CV_Error( CV_StsBadArg, "bad parameters range");
232
return true;
233
}
234
235
void CvCascadeBoostParams::printDefaults() const
236
{
237
cout << "--boostParams--" << endl;
238
cout << " [-bt <{" << CC_DISCRETE_BOOST << ", "
239
<< CC_REAL_BOOST << ", "
240
<< CC_LOGIT_BOOST ", "
241
<< CC_GENTLE_BOOST << "(default)}>]" << endl;
242
cout << " [-minHitRate <min_hit_rate> = " << minHitRate << ">]" << endl;
243
cout << " [-maxFalseAlarmRate <max_false_alarm_rate = " << maxFalseAlarm << ">]" << endl;
244
cout << " [-weightTrimRate <weight_trim_rate = " << weight_trim_rate << ">]" << endl;
245
cout << " [-maxDepth <max_depth_of_weak_tree = " << max_depth << ">]" << endl;
246
cout << " [-maxWeakCount <max_weak_tree_count = " << weak_count << ">]" << endl;
247
}
248
249
void CvCascadeBoostParams::printAttrs() const
250
{
251
string boostTypeStr = boost_type == CvBoost::DISCRETE ? CC_DISCRETE_BOOST :
252
boost_type == CvBoost::REAL ? CC_REAL_BOOST :
253
boost_type == CvBoost::LOGIT ? CC_LOGIT_BOOST :
254
boost_type == CvBoost::GENTLE ? CC_GENTLE_BOOST : string();
255
CV_Assert( !boostTypeStr.empty() );
256
cout << "boostType: " << boostTypeStr << endl;
257
cout << "minHitRate: " << minHitRate << endl;
258
cout << "maxFalseAlarmRate: " << maxFalseAlarm << endl;
259
cout << "weightTrimRate: " << weight_trim_rate << endl;
260
cout << "maxDepth: " << max_depth << endl;
261
cout << "maxWeakCount: " << weak_count << endl;
262
}
263
264
bool CvCascadeBoostParams::scanAttr( const string prmName, const string val)
265
{
266
bool res = true;
267
268
if( !prmName.compare( "-bt" ) )
269
{
270
boost_type = !val.compare( CC_DISCRETE_BOOST ) ? CvBoost::DISCRETE :
271
!val.compare( CC_REAL_BOOST ) ? CvBoost::REAL :
272
!val.compare( CC_LOGIT_BOOST ) ? CvBoost::LOGIT :
273
!val.compare( CC_GENTLE_BOOST ) ? CvBoost::GENTLE : -1;
274
if (boost_type == -1)
275
res = false;
276
}
277
else if( !prmName.compare( "-minHitRate" ) )
278
{
279
minHitRate = (float) atof( val.c_str() );
280
}
281
else if( !prmName.compare( "-maxFalseAlarmRate" ) )
282
{
283
maxFalseAlarm = (float) atof( val.c_str() );
284
}
285
else if( !prmName.compare( "-weightTrimRate" ) )
286
{
287
weight_trim_rate = (float) atof( val.c_str() );
288
}
289
else if( !prmName.compare( "-maxDepth" ) )
290
{
291
max_depth = atoi( val.c_str() );
292
}
293
else if( !prmName.compare( "-maxWeakCount" ) )
294
{
295
weak_count = atoi( val.c_str() );
296
}
297
else
298
res = false;
299
300
return res;
301
}
302
303
CvDTreeNode* CvCascadeBoostTrainData::subsample_data( const CvMat* _subsample_idx )
304
{
305
CvDTreeNode* root = 0;
306
CvMat* isubsample_idx = 0;
307
CvMat* subsample_co = 0;
308
309
bool isMakeRootCopy = true;
310
311
if( !data_root )
312
CV_Error( CV_StsError, "No training data has been set" );
313
314
if( _subsample_idx )
315
{
316
CV_Assert( (isubsample_idx = cvPreprocessIndexArray( _subsample_idx, sample_count )) != 0 );
317
318
if( isubsample_idx->cols + isubsample_idx->rows - 1 == sample_count )
319
{
320
const int* sidx = isubsample_idx->data.i;
321
for( int i = 0; i < sample_count; i++ )
322
{
323
if( sidx[i] != i )
324
{
325
isMakeRootCopy = false;
326
break;
327
}
328
}
329
}
330
else
331
isMakeRootCopy = false;
332
}
333
334
if( isMakeRootCopy )
335
{
336
// make a copy of the root node
337
CvDTreeNode temp;
338
int i;
339
root = new_node( 0, 1, 0, 0 );
340
temp = *root;
341
*root = *data_root;
342
root->num_valid = temp.num_valid;
343
if( root->num_valid )
344
{
345
for( i = 0; i < var_count; i++ )
346
root->num_valid[i] = data_root->num_valid[i];
347
}
348
root->cv_Tn = temp.cv_Tn;
349
root->cv_node_risk = temp.cv_node_risk;
350
root->cv_node_error = temp.cv_node_error;
351
}
352
else
353
{
354
int* sidx = isubsample_idx->data.i;
355
// co - array of count/offset pairs (to handle duplicated values in _subsample_idx)
356
int* co, cur_ofs = 0;
357
int workVarCount = get_work_var_count();
358
int count = isubsample_idx->rows + isubsample_idx->cols - 1;
359
360
root = new_node( 0, count, 1, 0 );
361
362
CV_Assert( (subsample_co = cvCreateMat( 1, sample_count*2, CV_32SC1 )) != 0);
363
cvZero( subsample_co );
364
co = subsample_co->data.i;
365
for( int i = 0; i < count; i++ )
366
co[sidx[i]*2]++;
367
for( int i = 0; i < sample_count; i++ )
368
{
369
if( co[i*2] )
370
{
371
co[i*2+1] = cur_ofs;
372
cur_ofs += co[i*2];
373
}
374
else
375
co[i*2+1] = -1;
376
}
377
378
cv::AutoBuffer<uchar> inn_buf(sample_count*(2*sizeof(int) + sizeof(float)));
379
// subsample ordered variables
380
for( int vi = 0; vi < numPrecalcIdx; vi++ )
381
{
382
int ci = get_var_type(vi);
383
CV_Assert( ci < 0 );
384
385
int *src_idx_buf = (int*)inn_buf.data();
386
float *src_val_buf = (float*)(src_idx_buf + sample_count);
387
int* sample_indices_buf = (int*)(src_val_buf + sample_count);
388
const int* src_idx = 0;
389
const float* src_val = 0;
390
get_ord_var_data( data_root, vi, src_val_buf, src_idx_buf, &src_val, &src_idx, sample_indices_buf );
391
392
int j = 0, idx, count_i;
393
int num_valid = data_root->get_num_valid(vi);
394
CV_Assert( num_valid == sample_count );
395
396
if (is_buf_16u)
397
{
398
unsigned short* udst_idx = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() +
399
(size_t)vi*sample_count + data_root->offset);
400
for( int i = 0; i < num_valid; i++ )
401
{
402
idx = src_idx[i];
403
count_i = co[idx*2];
404
if( count_i )
405
for( cur_ofs = co[idx*2+1]; count_i > 0; count_i--, j++, cur_ofs++ )
406
udst_idx[j] = (unsigned short)cur_ofs;
407
}
408
}
409
else
410
{
411
int* idst_idx = buf->data.i + root->buf_idx*get_length_subbuf() +
412
(size_t)vi*sample_count + root->offset;
413
for( int i = 0; i < num_valid; i++ )
414
{
415
idx = src_idx[i];
416
count_i = co[idx*2];
417
if( count_i )
418
for( cur_ofs = co[idx*2+1]; count_i > 0; count_i--, j++, cur_ofs++ )
419
idst_idx[j] = cur_ofs;
420
}
421
}
422
}
423
424
// subsample cv_lables
425
const int* src_lbls = get_cv_labels(data_root, (int*)inn_buf.data());
426
if (is_buf_16u)
427
{
428
unsigned short* udst = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() +
429
(size_t)(workVarCount-1)*sample_count + root->offset);
430
for( int i = 0; i < count; i++ )
431
udst[i] = (unsigned short)src_lbls[sidx[i]];
432
}
433
else
434
{
435
int* idst = buf->data.i + root->buf_idx*get_length_subbuf() +
436
(size_t)(workVarCount-1)*sample_count + root->offset;
437
for( int i = 0; i < count; i++ )
438
idst[i] = src_lbls[sidx[i]];
439
}
440
441
// subsample sample_indices
442
const int* sample_idx_src = get_sample_indices(data_root, (int*)inn_buf.data());
443
if (is_buf_16u)
444
{
445
unsigned short* sample_idx_dst = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() +
446
(size_t)workVarCount*sample_count + root->offset);
447
for( int i = 0; i < count; i++ )
448
sample_idx_dst[i] = (unsigned short)sample_idx_src[sidx[i]];
449
}
450
else
451
{
452
int* sample_idx_dst = buf->data.i + root->buf_idx*get_length_subbuf() +
453
(size_t)workVarCount*sample_count + root->offset;
454
for( int i = 0; i < count; i++ )
455
sample_idx_dst[i] = sample_idx_src[sidx[i]];
456
}
457
458
for( int vi = 0; vi < var_count; vi++ )
459
root->set_num_valid(vi, count);
460
}
461
462
cvReleaseMat( &isubsample_idx );
463
cvReleaseMat( &subsample_co );
464
465
return root;
466
}
467
468
//---------------------------- CascadeBoostTrainData -----------------------------
469
470
CvCascadeBoostTrainData::CvCascadeBoostTrainData( const CvFeatureEvaluator* _featureEvaluator,
471
const CvDTreeParams& _params )
472
{
473
is_classifier = true;
474
var_all = var_count = (int)_featureEvaluator->getNumFeatures();
475
476
featureEvaluator = _featureEvaluator;
477
shared = true;
478
set_params( _params );
479
max_c_count = MAX( 2, featureEvaluator->getMaxCatCount() );
480
var_type = cvCreateMat( 1, var_count + 2, CV_32SC1 );
481
if ( featureEvaluator->getMaxCatCount() > 0 )
482
{
483
numPrecalcIdx = 0;
484
cat_var_count = var_count;
485
ord_var_count = 0;
486
for( int vi = 0; vi < var_count; vi++ )
487
{
488
var_type->data.i[vi] = vi;
489
}
490
}
491
else
492
{
493
cat_var_count = 0;
494
ord_var_count = var_count;
495
for( int vi = 1; vi <= var_count; vi++ )
496
{
497
var_type->data.i[vi-1] = -vi;
498
}
499
}
500
var_type->data.i[var_count] = cat_var_count;
501
var_type->data.i[var_count+1] = cat_var_count+1;
502
503
int maxSplitSize = cvAlign(sizeof(CvDTreeSplit) + (MAX(0,max_c_count - 33)/32)*sizeof(int),sizeof(void*));
504
int treeBlockSize = MAX((int)sizeof(CvDTreeNode)*8, maxSplitSize);
505
treeBlockSize = MAX(treeBlockSize + BlockSizeDelta, MinBlockSize);
506
tree_storage = cvCreateMemStorage( treeBlockSize );
507
node_heap = cvCreateSet( 0, sizeof(node_heap[0]), sizeof(CvDTreeNode), tree_storage );
508
split_heap = cvCreateSet( 0, sizeof(split_heap[0]), maxSplitSize, tree_storage );
509
}
510
511
CvCascadeBoostTrainData::CvCascadeBoostTrainData( const CvFeatureEvaluator* _featureEvaluator,
512
int _numSamples,
513
int _precalcValBufSize, int _precalcIdxBufSize,
514
const CvDTreeParams& _params )
515
{
516
setData( _featureEvaluator, _numSamples, _precalcValBufSize, _precalcIdxBufSize, _params );
517
}
518
519
void CvCascadeBoostTrainData::setData( const CvFeatureEvaluator* _featureEvaluator,
520
int _numSamples,
521
int _precalcValBufSize, int _precalcIdxBufSize,
522
const CvDTreeParams& _params )
523
{
524
int* idst = 0;
525
unsigned short* udst = 0;
526
527
uint64 effective_buf_size = 0;
528
int effective_buf_height = 0, effective_buf_width = 0;
529
530
531
clear();
532
shared = true;
533
have_labels = true;
534
have_priors = false;
535
is_classifier = true;
536
537
rng = &cv::theRNG();
538
539
set_params( _params );
540
541
CV_Assert( _featureEvaluator );
542
featureEvaluator = _featureEvaluator;
543
544
max_c_count = MAX( 2, featureEvaluator->getMaxCatCount() );
545
_resp = cvMat(featureEvaluator->getCls());
546
responses = &_resp;
547
// TODO: check responses: elements must be 0 or 1
548
549
if( _precalcValBufSize < 0 || _precalcIdxBufSize < 0)
550
CV_Error( CV_StsOutOfRange, "_numPrecalcVal and _numPrecalcIdx must be positive or 0" );
551
552
var_count = var_all = featureEvaluator->getNumFeatures() * featureEvaluator->getFeatureSize();
553
sample_count = _numSamples;
554
555
is_buf_16u = false;
556
if (sample_count < 65536)
557
is_buf_16u = true;
558
559
numPrecalcVal = min( cvRound((double)_precalcValBufSize*1048576. / (sizeof(float)*sample_count)), var_count );
560
numPrecalcIdx = min( cvRound((double)_precalcIdxBufSize*1048576. /
561
((is_buf_16u ? sizeof(unsigned short) : sizeof (int))*sample_count)), var_count );
562
563
assert( numPrecalcIdx >= 0 && numPrecalcVal >= 0 );
564
565
valCache.create( numPrecalcVal, sample_count, CV_32FC1 );
566
var_type = cvCreateMat( 1, var_count + 2, CV_32SC1 );
567
568
if ( featureEvaluator->getMaxCatCount() > 0 )
569
{
570
numPrecalcIdx = 0;
571
cat_var_count = var_count;
572
ord_var_count = 0;
573
for( int vi = 0; vi < var_count; vi++ )
574
{
575
var_type->data.i[vi] = vi;
576
}
577
}
578
else
579
{
580
cat_var_count = 0;
581
ord_var_count = var_count;
582
for( int vi = 1; vi <= var_count; vi++ )
583
{
584
var_type->data.i[vi-1] = -vi;
585
}
586
}
587
var_type->data.i[var_count] = cat_var_count;
588
var_type->data.i[var_count+1] = cat_var_count+1;
589
work_var_count = ( cat_var_count ? 0 : numPrecalcIdx ) + 1/*cv_lables*/;
590
buf_count = 2;
591
592
buf_size = -1; // the member buf_size is obsolete
593
594
effective_buf_size = (uint64)(work_var_count + 1)*(uint64)sample_count * buf_count; // this is the total size of "CvMat buf" to be allocated
595
effective_buf_width = sample_count;
596
effective_buf_height = work_var_count+1;
597
598
if (effective_buf_width >= effective_buf_height)
599
effective_buf_height *= buf_count;
600
else
601
effective_buf_width *= buf_count;
602
603
if ((uint64)effective_buf_width * (uint64)effective_buf_height != effective_buf_size)
604
{
605
CV_Error(CV_StsBadArg, "The memory buffer cannot be allocated since its size exceeds integer fields limit");
606
}
607
608
if ( is_buf_16u )
609
buf = cvCreateMat( effective_buf_height, effective_buf_width, CV_16UC1 );
610
else
611
buf = cvCreateMat( effective_buf_height, effective_buf_width, CV_32SC1 );
612
613
cat_count = cvCreateMat( 1, cat_var_count + 1, CV_32SC1 );
614
615
// precalculate valCache and set indices in buf
616
precalculate();
617
618
// now calculate the maximum size of split,
619
// create memory storage that will keep nodes and splits of the decision tree
620
// allocate root node and the buffer for the whole training data
621
int maxSplitSize = cvAlign(sizeof(CvDTreeSplit) +
622
(MAX(0,sample_count - 33)/32)*sizeof(int),sizeof(void*));
623
int treeBlockSize = MAX((int)sizeof(CvDTreeNode)*8, maxSplitSize);
624
treeBlockSize = MAX(treeBlockSize + BlockSizeDelta, MinBlockSize);
625
tree_storage = cvCreateMemStorage( treeBlockSize );
626
node_heap = cvCreateSet( 0, sizeof(*node_heap), sizeof(CvDTreeNode), tree_storage );
627
628
int nvSize = var_count*sizeof(int);
629
nvSize = cvAlign(MAX( nvSize, (int)sizeof(CvSetElem) ), sizeof(void*));
630
int tempBlockSize = nvSize;
631
tempBlockSize = MAX( tempBlockSize + BlockSizeDelta, MinBlockSize );
632
temp_storage = cvCreateMemStorage( tempBlockSize );
633
nv_heap = cvCreateSet( 0, sizeof(*nv_heap), nvSize, temp_storage );
634
635
data_root = new_node( 0, sample_count, 0, 0 );
636
637
// set sample labels
638
if (is_buf_16u)
639
udst = (unsigned short*)(buf->data.s + (size_t)work_var_count*sample_count);
640
else
641
idst = buf->data.i + (size_t)work_var_count*sample_count;
642
643
for (int si = 0; si < sample_count; si++)
644
{
645
if (udst)
646
udst[si] = (unsigned short)si;
647
else
648
idst[si] = si;
649
}
650
for( int vi = 0; vi < var_count; vi++ )
651
data_root->set_num_valid(vi, sample_count);
652
for( int vi = 0; vi < cat_var_count; vi++ )
653
cat_count->data.i[vi] = max_c_count;
654
655
cat_count->data.i[cat_var_count] = 2;
656
657
maxSplitSize = cvAlign(sizeof(CvDTreeSplit) +
658
(MAX(0,max_c_count - 33)/32)*sizeof(int),sizeof(void*));
659
split_heap = cvCreateSet( 0, sizeof(*split_heap), maxSplitSize, tree_storage );
660
661
priors = cvCreateMat( 1, get_num_classes(), CV_64F );
662
cvSet(priors, cvScalar(1));
663
priors_mult = cvCloneMat( priors );
664
counts = cvCreateMat( 1, get_num_classes(), CV_32SC1 );
665
direction = cvCreateMat( 1, sample_count, CV_8UC1 );
666
split_buf = cvCreateMat( 1, sample_count, CV_32SC1 );//TODO: make a pointer
667
}
668
669
void CvCascadeBoostTrainData::free_train_data()
670
{
671
CvDTreeTrainData::free_train_data();
672
valCache.release();
673
}
674
675
const int* CvCascadeBoostTrainData::get_class_labels( CvDTreeNode* n, int* labelsBuf)
676
{
677
int nodeSampleCount = n->sample_count;
678
int rStep = CV_IS_MAT_CONT( responses->type ) ? 1 : responses->step / CV_ELEM_SIZE( responses->type );
679
680
int* sampleIndicesBuf = labelsBuf; //
681
const int* sampleIndices = get_sample_indices(n, sampleIndicesBuf);
682
for( int si = 0; si < nodeSampleCount; si++ )
683
{
684
int sidx = sampleIndices[si];
685
labelsBuf[si] = (int)responses->data.fl[sidx*rStep];
686
}
687
return labelsBuf;
688
}
689
690
const int* CvCascadeBoostTrainData::get_sample_indices( CvDTreeNode* n, int* indicesBuf )
691
{
692
return CvDTreeTrainData::get_cat_var_data( n, get_work_var_count(), indicesBuf );
693
}
694
695
const int* CvCascadeBoostTrainData::get_cv_labels( CvDTreeNode* n, int* labels_buf )
696
{
697
return CvDTreeTrainData::get_cat_var_data( n, get_work_var_count() - 1, labels_buf );
698
}
699
700
void CvCascadeBoostTrainData::get_ord_var_data( CvDTreeNode* n, int vi, float* ordValuesBuf, int* sortedIndicesBuf,
701
const float** ordValues, const int** sortedIndices, int* sampleIndicesBuf )
702
{
703
int nodeSampleCount = n->sample_count;
704
const int* sampleIndices = get_sample_indices(n, sampleIndicesBuf);
705
706
if ( vi < numPrecalcIdx )
707
{
708
if( !is_buf_16u )
709
*sortedIndices = buf->data.i + n->buf_idx*get_length_subbuf() + (size_t)vi*sample_count + n->offset;
710
else
711
{
712
const unsigned short* shortIndices = (const unsigned short*)(buf->data.s + n->buf_idx*get_length_subbuf() +
713
(size_t)vi*sample_count + n->offset );
714
for( int i = 0; i < nodeSampleCount; i++ )
715
sortedIndicesBuf[i] = shortIndices[i];
716
717
*sortedIndices = sortedIndicesBuf;
718
}
719
720
if( vi < numPrecalcVal )
721
{
722
for( int i = 0; i < nodeSampleCount; i++ )
723
{
724
int idx = (*sortedIndices)[i];
725
idx = sampleIndices[idx];
726
ordValuesBuf[i] = valCache.at<float>( vi, idx);
727
}
728
}
729
else
730
{
731
for( int i = 0; i < nodeSampleCount; i++ )
732
{
733
int idx = (*sortedIndices)[i];
734
idx = sampleIndices[idx];
735
ordValuesBuf[i] = (*featureEvaluator)( vi, idx);
736
}
737
}
738
}
739
else // vi >= numPrecalcIdx
740
{
741
cv::AutoBuffer<float> abuf(nodeSampleCount);
742
float* sampleValues = &abuf[0];
743
744
if ( vi < numPrecalcVal )
745
{
746
for( int i = 0; i < nodeSampleCount; i++ )
747
{
748
sortedIndicesBuf[i] = i;
749
sampleValues[i] = valCache.at<float>( vi, sampleIndices[i] );
750
}
751
}
752
else
753
{
754
for( int i = 0; i < nodeSampleCount; i++ )
755
{
756
sortedIndicesBuf[i] = i;
757
sampleValues[i] = (*featureEvaluator)( vi, sampleIndices[i]);
758
}
759
}
760
std::sort(sortedIndicesBuf, sortedIndicesBuf + nodeSampleCount, LessThanIdx<float, int>(&sampleValues[0]) );
761
for( int i = 0; i < nodeSampleCount; i++ )
762
ordValuesBuf[i] = (&sampleValues[0])[sortedIndicesBuf[i]];
763
*sortedIndices = sortedIndicesBuf;
764
}
765
766
*ordValues = ordValuesBuf;
767
}
768
769
const int* CvCascadeBoostTrainData::get_cat_var_data( CvDTreeNode* n, int vi, int* catValuesBuf )
770
{
771
int nodeSampleCount = n->sample_count;
772
int* sampleIndicesBuf = catValuesBuf; //
773
const int* sampleIndices = get_sample_indices(n, sampleIndicesBuf);
774
775
if ( vi < numPrecalcVal )
776
{
777
for( int i = 0; i < nodeSampleCount; i++ )
778
catValuesBuf[i] = (int) valCache.at<float>( vi, sampleIndices[i]);
779
}
780
else
781
{
782
if( vi >= numPrecalcVal && vi < var_count )
783
{
784
for( int i = 0; i < nodeSampleCount; i++ )
785
catValuesBuf[i] = (int)(*featureEvaluator)( vi, sampleIndices[i] );
786
}
787
else
788
{
789
get_cv_labels( n, catValuesBuf );
790
}
791
}
792
793
return catValuesBuf;
794
}
795
796
float CvCascadeBoostTrainData::getVarValue( int vi, int si )
797
{
798
if ( vi < numPrecalcVal && !valCache.empty() )
799
return valCache.at<float>( vi, si );
800
return (*featureEvaluator)( vi, si );
801
}
802
803
804
struct FeatureIdxOnlyPrecalc : ParallelLoopBody
805
{
806
FeatureIdxOnlyPrecalc( const CvFeatureEvaluator* _featureEvaluator, CvMat* _buf, int _sample_count, bool _is_buf_16u )
807
{
808
featureEvaluator = _featureEvaluator;
809
sample_count = _sample_count;
810
udst = (unsigned short*)_buf->data.s;
811
idst = _buf->data.i;
812
is_buf_16u = _is_buf_16u;
813
}
814
void operator()( const Range& range ) const
815
{
816
cv::AutoBuffer<float> valCache(sample_count);
817
float* valCachePtr = valCache.data();
818
for ( int fi = range.start; fi < range.end; fi++)
819
{
820
for( int si = 0; si < sample_count; si++ )
821
{
822
valCachePtr[si] = (*featureEvaluator)( fi, si );
823
if ( is_buf_16u )
824
*(udst + (size_t)fi*sample_count + si) = (unsigned short)si;
825
else
826
*(idst + (size_t)fi*sample_count + si) = si;
827
}
828
if ( is_buf_16u )
829
std::sort(udst + (size_t)fi*sample_count, udst + (size_t)(fi + 1)*sample_count, LessThanIdx<float, unsigned short>(valCachePtr) );
830
else
831
std::sort(idst + (size_t)fi*sample_count, idst + (size_t)(fi + 1)*sample_count, LessThanIdx<float, int>(valCachePtr) );
832
}
833
}
834
const CvFeatureEvaluator* featureEvaluator;
835
int sample_count;
836
int* idst;
837
unsigned short* udst;
838
bool is_buf_16u;
839
};
840
841
struct FeatureValAndIdxPrecalc : ParallelLoopBody
842
{
843
FeatureValAndIdxPrecalc( const CvFeatureEvaluator* _featureEvaluator, CvMat* _buf, Mat* _valCache, int _sample_count, bool _is_buf_16u )
844
{
845
featureEvaluator = _featureEvaluator;
846
valCache = _valCache;
847
sample_count = _sample_count;
848
udst = (unsigned short*)_buf->data.s;
849
idst = _buf->data.i;
850
is_buf_16u = _is_buf_16u;
851
}
852
void operator()( const Range& range ) const
853
{
854
for ( int fi = range.start; fi < range.end; fi++)
855
{
856
for( int si = 0; si < sample_count; si++ )
857
{
858
valCache->at<float>(fi,si) = (*featureEvaluator)( fi, si );
859
if ( is_buf_16u )
860
*(udst + (size_t)fi*sample_count + si) = (unsigned short)si;
861
else
862
*(idst + (size_t)fi*sample_count + si) = si;
863
}
864
if ( is_buf_16u )
865
std::sort(udst + (size_t)fi*sample_count, udst + (size_t)(fi + 1)*sample_count, LessThanIdx<float, unsigned short>(valCache->ptr<float>(fi)) );
866
else
867
std::sort(idst + (size_t)fi*sample_count, idst + (size_t)(fi + 1)*sample_count, LessThanIdx<float, int>(valCache->ptr<float>(fi)) );
868
}
869
}
870
const CvFeatureEvaluator* featureEvaluator;
871
Mat* valCache;
872
int sample_count;
873
int* idst;
874
unsigned short* udst;
875
bool is_buf_16u;
876
};
877
878
struct FeatureValOnlyPrecalc : ParallelLoopBody
879
{
880
FeatureValOnlyPrecalc( const CvFeatureEvaluator* _featureEvaluator, Mat* _valCache, int _sample_count )
881
{
882
featureEvaluator = _featureEvaluator;
883
valCache = _valCache;
884
sample_count = _sample_count;
885
}
886
void operator()( const Range& range ) const
887
{
888
for ( int fi = range.start; fi < range.end; fi++)
889
for( int si = 0; si < sample_count; si++ )
890
valCache->at<float>(fi,si) = (*featureEvaluator)( fi, si );
891
}
892
const CvFeatureEvaluator* featureEvaluator;
893
Mat* valCache;
894
int sample_count;
895
};
896
897
void CvCascadeBoostTrainData::precalculate()
898
{
899
int minNum = MIN( numPrecalcVal, numPrecalcIdx);
900
901
double proctime = -TIME( 0 );
902
parallel_for_( Range(numPrecalcVal, numPrecalcIdx),
903
FeatureIdxOnlyPrecalc(featureEvaluator, buf, sample_count, is_buf_16u!=0) );
904
parallel_for_( Range(0, minNum),
905
FeatureValAndIdxPrecalc(featureEvaluator, buf, &valCache, sample_count, is_buf_16u!=0) );
906
parallel_for_( Range(minNum, numPrecalcVal),
907
FeatureValOnlyPrecalc(featureEvaluator, &valCache, sample_count) );
908
cout << "Precalculation time: " << (proctime + TIME( 0 )) << endl;
909
}
910
911
//-------------------------------- CascadeBoostTree ----------------------------------------
912
913
CvDTreeNode* CvCascadeBoostTree::predict( int sampleIdx ) const
914
{
915
CvDTreeNode* node = root;
916
if( !node )
917
CV_Error( CV_StsError, "The tree has not been trained yet" );
918
919
if ( ((CvCascadeBoostTrainData*)data)->featureEvaluator->getMaxCatCount() == 0 ) // ordered
920
{
921
while( node->left )
922
{
923
CvDTreeSplit* split = node->split;
924
float val = ((CvCascadeBoostTrainData*)data)->getVarValue( split->var_idx, sampleIdx );
925
node = val <= split->ord.c ? node->left : node->right;
926
}
927
}
928
else // categorical
929
{
930
while( node->left )
931
{
932
CvDTreeSplit* split = node->split;
933
int c = (int)((CvCascadeBoostTrainData*)data)->getVarValue( split->var_idx, sampleIdx );
934
node = CV_DTREE_CAT_DIR(c, split->subset) < 0 ? node->left : node->right;
935
}
936
}
937
return node;
938
}
939
940
void CvCascadeBoostTree::write( FileStorage &fs, const Mat& featureMap )
941
{
942
int maxCatCount = ((CvCascadeBoostTrainData*)data)->featureEvaluator->getMaxCatCount();
943
int subsetN = (maxCatCount + 31)/32;
944
queue<CvDTreeNode*> internalNodesQueue;
945
int size = (int)pow( 2.f, (float)ensemble->get_params().max_depth);
946
std::vector<float> leafVals(size);
947
int leafValIdx = 0;
948
int internalNodeIdx = 1;
949
CvDTreeNode* tempNode;
950
951
CV_DbgAssert( root );
952
internalNodesQueue.push( root );
953
954
fs << "{";
955
fs << CC_INTERNAL_NODES << "[:";
956
while (!internalNodesQueue.empty())
957
{
958
tempNode = internalNodesQueue.front();
959
CV_Assert( tempNode->left );
960
if ( !tempNode->left->left && !tempNode->left->right) // left node is leaf
961
{
962
leafVals[-leafValIdx] = (float)tempNode->left->value;
963
fs << leafValIdx-- ;
964
}
965
else
966
{
967
internalNodesQueue.push( tempNode->left );
968
fs << internalNodeIdx++;
969
}
970
CV_Assert( tempNode->right );
971
if ( !tempNode->right->left && !tempNode->right->right) // right node is leaf
972
{
973
leafVals[-leafValIdx] = (float)tempNode->right->value;
974
fs << leafValIdx--;
975
}
976
else
977
{
978
internalNodesQueue.push( tempNode->right );
979
fs << internalNodeIdx++;
980
}
981
int fidx = tempNode->split->var_idx;
982
fidx = featureMap.empty() ? fidx : featureMap.at<int>(0, fidx);
983
fs << fidx;
984
if ( !maxCatCount )
985
fs << tempNode->split->ord.c;
986
else
987
for( int i = 0; i < subsetN; i++ )
988
fs << tempNode->split->subset[i];
989
internalNodesQueue.pop();
990
}
991
fs << "]"; // CC_INTERNAL_NODES
992
993
fs << CC_LEAF_VALUES << "[:";
994
for (int ni = 0; ni < -leafValIdx; ni++)
995
fs << leafVals[ni];
996
fs << "]"; // CC_LEAF_VALUES
997
fs << "}";
998
}
999
1000
void CvCascadeBoostTree::read( const FileNode &node, CvBoost* _ensemble,
1001
CvDTreeTrainData* _data )
1002
{
1003
int maxCatCount = ((CvCascadeBoostTrainData*)_data)->featureEvaluator->getMaxCatCount();
1004
int subsetN = (maxCatCount + 31)/32;
1005
int step = 3 + ( maxCatCount>0 ? subsetN : 1 );
1006
1007
queue<CvDTreeNode*> internalNodesQueue;
1008
FileNodeIterator internalNodesIt, leafValsuesIt;
1009
CvDTreeNode* prntNode, *cldNode;
1010
1011
clear();
1012
data = _data;
1013
ensemble = _ensemble;
1014
pruned_tree_idx = 0;
1015
1016
// read tree nodes
1017
FileNode rnode = node[CC_INTERNAL_NODES];
1018
internalNodesIt = rnode.end();
1019
leafValsuesIt = node[CC_LEAF_VALUES].end();
1020
internalNodesIt--; leafValsuesIt--;
1021
for( size_t i = 0; i < rnode.size()/step; i++ )
1022
{
1023
prntNode = data->new_node( 0, 0, 0, 0 );
1024
if ( maxCatCount > 0 )
1025
{
1026
prntNode->split = data->new_split_cat( 0, 0 );
1027
for( int j = subsetN-1; j>=0; j--)
1028
{
1029
*internalNodesIt >> prntNode->split->subset[j]; internalNodesIt--;
1030
}
1031
}
1032
else
1033
{
1034
float split_value;
1035
*internalNodesIt >> split_value; internalNodesIt--;
1036
prntNode->split = data->new_split_ord( 0, split_value, 0, 0, 0);
1037
}
1038
*internalNodesIt >> prntNode->split->var_idx; internalNodesIt--;
1039
int ridx, lidx;
1040
*internalNodesIt >> ridx; internalNodesIt--;
1041
*internalNodesIt >> lidx;internalNodesIt--;
1042
if ( ridx <= 0)
1043
{
1044
prntNode->right = cldNode = data->new_node( 0, 0, 0, 0 );
1045
*leafValsuesIt >> cldNode->value; leafValsuesIt--;
1046
cldNode->parent = prntNode;
1047
}
1048
else
1049
{
1050
prntNode->right = internalNodesQueue.front();
1051
prntNode->right->parent = prntNode;
1052
internalNodesQueue.pop();
1053
}
1054
1055
if ( lidx <= 0)
1056
{
1057
prntNode->left = cldNode = data->new_node( 0, 0, 0, 0 );
1058
*leafValsuesIt >> cldNode->value; leafValsuesIt--;
1059
cldNode->parent = prntNode;
1060
}
1061
else
1062
{
1063
prntNode->left = internalNodesQueue.front();
1064
prntNode->left->parent = prntNode;
1065
internalNodesQueue.pop();
1066
}
1067
1068
internalNodesQueue.push( prntNode );
1069
}
1070
1071
root = internalNodesQueue.front();
1072
internalNodesQueue.pop();
1073
}
1074
1075
void CvCascadeBoostTree::split_node_data( CvDTreeNode* node )
1076
{
1077
int n = node->sample_count, nl, nr, scount = data->sample_count;
1078
char* dir = (char*)data->direction->data.ptr;
1079
CvDTreeNode *left = 0, *right = 0;
1080
int* newIdx = data->split_buf->data.i;
1081
int newBufIdx = data->get_child_buf_idx( node );
1082
int workVarCount = data->get_work_var_count();
1083
CvMat* buf = data->buf;
1084
size_t length_buf_row = data->get_length_subbuf();
1085
cv::AutoBuffer<uchar> inn_buf(n*(3*sizeof(int)+sizeof(float)));
1086
int* tempBuf = (int*)inn_buf.data();
1087
bool splitInputData;
1088
1089
complete_node_dir(node);
1090
1091
for( int i = nl = nr = 0; i < n; i++ )
1092
{
1093
int d = dir[i];
1094
// initialize new indices for splitting ordered variables
1095
newIdx[i] = (nl & (d-1)) | (nr & -d); // d ? ri : li
1096
nr += d;
1097
nl += d^1;
1098
}
1099
1100
node->left = left = data->new_node( node, nl, newBufIdx, node->offset );
1101
node->right = right = data->new_node( node, nr, newBufIdx, node->offset + nl );
1102
1103
splitInputData = node->depth + 1 < data->params.max_depth &&
1104
(node->left->sample_count > data->params.min_sample_count ||
1105
node->right->sample_count > data->params.min_sample_count);
1106
1107
// split ordered variables, keep both halves sorted.
1108
for( int vi = 0; vi < ((CvCascadeBoostTrainData*)data)->numPrecalcIdx; vi++ )
1109
{
1110
int ci = data->get_var_type(vi);
1111
if( ci >= 0 || !splitInputData )
1112
continue;
1113
1114
int n1 = node->get_num_valid(vi);
1115
float *src_val_buf = (float*)(tempBuf + n);
1116
int *src_sorted_idx_buf = (int*)(src_val_buf + n);
1117
int *src_sample_idx_buf = src_sorted_idx_buf + n;
1118
const int* src_sorted_idx = 0;
1119
const float* src_val = 0;
1120
data->get_ord_var_data(node, vi, src_val_buf, src_sorted_idx_buf, &src_val, &src_sorted_idx, src_sample_idx_buf);
1121
1122
for(int i = 0; i < n; i++)
1123
tempBuf[i] = src_sorted_idx[i];
1124
1125
if (data->is_buf_16u)
1126
{
1127
ushort *ldst, *rdst;
1128
ldst = (ushort*)(buf->data.s + left->buf_idx*length_buf_row +
1129
vi*scount + left->offset);
1130
rdst = (ushort*)(ldst + nl);
1131
1132
// split sorted
1133
for( int i = 0; i < n1; i++ )
1134
{
1135
int idx = tempBuf[i];
1136
int d = dir[idx];
1137
idx = newIdx[idx];
1138
if (d)
1139
{
1140
*rdst = (ushort)idx;
1141
rdst++;
1142
}
1143
else
1144
{
1145
*ldst = (ushort)idx;
1146
ldst++;
1147
}
1148
}
1149
CV_Assert( n1 == n );
1150
}
1151
else
1152
{
1153
int *ldst, *rdst;
1154
ldst = buf->data.i + left->buf_idx*length_buf_row +
1155
vi*scount + left->offset;
1156
rdst = buf->data.i + right->buf_idx*length_buf_row +
1157
vi*scount + right->offset;
1158
1159
// split sorted
1160
for( int i = 0; i < n1; i++ )
1161
{
1162
int idx = tempBuf[i];
1163
int d = dir[idx];
1164
idx = newIdx[idx];
1165
if (d)
1166
{
1167
*rdst = idx;
1168
rdst++;
1169
}
1170
else
1171
{
1172
*ldst = idx;
1173
ldst++;
1174
}
1175
}
1176
CV_Assert( n1 == n );
1177
}
1178
}
1179
1180
// split cv_labels using newIdx relocation table
1181
int *src_lbls_buf = tempBuf + n;
1182
const int* src_lbls = data->get_cv_labels(node, src_lbls_buf);
1183
1184
for(int i = 0; i < n; i++)
1185
tempBuf[i] = src_lbls[i];
1186
1187
if (data->is_buf_16u)
1188
{
1189
unsigned short *ldst = (unsigned short *)(buf->data.s + left->buf_idx*length_buf_row +
1190
(size_t)(workVarCount-1)*scount + left->offset);
1191
unsigned short *rdst = (unsigned short *)(buf->data.s + right->buf_idx*length_buf_row +
1192
(size_t)(workVarCount-1)*scount + right->offset);
1193
1194
for( int i = 0; i < n; i++ )
1195
{
1196
int idx = tempBuf[i];
1197
if (dir[i])
1198
{
1199
*rdst = (unsigned short)idx;
1200
rdst++;
1201
}
1202
else
1203
{
1204
*ldst = (unsigned short)idx;
1205
ldst++;
1206
}
1207
}
1208
1209
}
1210
else
1211
{
1212
int *ldst = buf->data.i + left->buf_idx*length_buf_row +
1213
(size_t)(workVarCount-1)*scount + left->offset;
1214
int *rdst = buf->data.i + right->buf_idx*length_buf_row +
1215
(size_t)(workVarCount-1)*scount + right->offset;
1216
1217
for( int i = 0; i < n; i++ )
1218
{
1219
int idx = tempBuf[i];
1220
if (dir[i])
1221
{
1222
*rdst = idx;
1223
rdst++;
1224
}
1225
else
1226
{
1227
*ldst = idx;
1228
ldst++;
1229
}
1230
}
1231
}
1232
1233
// split sample indices
1234
int *sampleIdx_src_buf = tempBuf + n;
1235
const int* sampleIdx_src = data->get_sample_indices(node, sampleIdx_src_buf);
1236
1237
for(int i = 0; i < n; i++)
1238
tempBuf[i] = sampleIdx_src[i];
1239
1240
if (data->is_buf_16u)
1241
{
1242
unsigned short* ldst = (unsigned short*)(buf->data.s + left->buf_idx*length_buf_row +
1243
(size_t)workVarCount*scount + left->offset);
1244
unsigned short* rdst = (unsigned short*)(buf->data.s + right->buf_idx*length_buf_row +
1245
(size_t)workVarCount*scount + right->offset);
1246
for (int i = 0; i < n; i++)
1247
{
1248
unsigned short idx = (unsigned short)tempBuf[i];
1249
if (dir[i])
1250
{
1251
*rdst = idx;
1252
rdst++;
1253
}
1254
else
1255
{
1256
*ldst = idx;
1257
ldst++;
1258
}
1259
}
1260
}
1261
else
1262
{
1263
int* ldst = buf->data.i + left->buf_idx*length_buf_row +
1264
(size_t)workVarCount*scount + left->offset;
1265
int* rdst = buf->data.i + right->buf_idx*length_buf_row +
1266
(size_t)workVarCount*scount + right->offset;
1267
for (int i = 0; i < n; i++)
1268
{
1269
int idx = tempBuf[i];
1270
if (dir[i])
1271
{
1272
*rdst = idx;
1273
rdst++;
1274
}
1275
else
1276
{
1277
*ldst = idx;
1278
ldst++;
1279
}
1280
}
1281
}
1282
1283
for( int vi = 0; vi < data->var_count; vi++ )
1284
{
1285
left->set_num_valid(vi, (int)(nl));
1286
right->set_num_valid(vi, (int)(nr));
1287
}
1288
1289
// deallocate the parent node data that is not needed anymore
1290
data->free_node_data(node);
1291
}
1292
1293
static void auxMarkFeaturesInMap( const CvDTreeNode* node, Mat& featureMap)
1294
{
1295
if ( node && node->split )
1296
{
1297
featureMap.ptr<int>(0)[node->split->var_idx] = 1;
1298
auxMarkFeaturesInMap( node->left, featureMap );
1299
auxMarkFeaturesInMap( node->right, featureMap );
1300
}
1301
}
1302
1303
void CvCascadeBoostTree::markFeaturesInMap( Mat& featureMap )
1304
{
1305
auxMarkFeaturesInMap( root, featureMap );
1306
}
1307
1308
//----------------------------------- CascadeBoost --------------------------------------
1309
1310
bool CvCascadeBoost::train( const CvFeatureEvaluator* _featureEvaluator,
1311
int _numSamples,
1312
int _precalcValBufSize, int _precalcIdxBufSize,
1313
const CvCascadeBoostParams& _params )
1314
{
1315
bool isTrained = false;
1316
CV_Assert( !data );
1317
clear();
1318
data = new CvCascadeBoostTrainData( _featureEvaluator, _numSamples,
1319
_precalcValBufSize, _precalcIdxBufSize, _params );
1320
CvMemStorage *storage = cvCreateMemStorage();
1321
weak = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvBoostTree*), storage );
1322
storage = 0;
1323
1324
set_params( _params );
1325
if ( (_params.boost_type == LOGIT) || (_params.boost_type == GENTLE) )
1326
data->do_responses_copy();
1327
1328
update_weights( 0 );
1329
1330
cout << "+----+---------+---------+" << endl;
1331
cout << "| N | HR | FA |" << endl;
1332
cout << "+----+---------+---------+" << endl;
1333
1334
do
1335
{
1336
CvCascadeBoostTree* tree = new CvCascadeBoostTree;
1337
if( !tree->train( data, subsample_mask, this ) )
1338
{
1339
delete tree;
1340
break;
1341
}
1342
cvSeqPush( weak, &tree );
1343
update_weights( tree );
1344
trim_weights();
1345
if( cvCountNonZero(subsample_mask) == 0 )
1346
break;
1347
}
1348
while( !isErrDesired() && (weak->total < params.weak_count) );
1349
1350
if(weak->total > 0)
1351
{
1352
data->is_classifier = true;
1353
data->free_train_data();
1354
isTrained = true;
1355
}
1356
else
1357
clear();
1358
1359
return isTrained;
1360
}
1361
1362
float CvCascadeBoost::predict( int sampleIdx, bool returnSum ) const
1363
{
1364
CV_Assert( weak );
1365
double sum = 0;
1366
CvSeqReader reader;
1367
cvStartReadSeq( weak, &reader );
1368
cvSetSeqReaderPos( &reader, 0 );
1369
for( int i = 0; i < weak->total; i++ )
1370
{
1371
CvBoostTree* wtree;
1372
CV_READ_SEQ_ELEM( wtree, reader );
1373
sum += ((CvCascadeBoostTree*)wtree)->predict(sampleIdx)->value;
1374
}
1375
if( !returnSum )
1376
sum = sum < threshold - CV_THRESHOLD_EPS ? 0.0 : 1.0;
1377
return (float)sum;
1378
}
1379
1380
bool CvCascadeBoost::set_params( const CvBoostParams& _params )
1381
{
1382
minHitRate = ((CvCascadeBoostParams&)_params).minHitRate;
1383
maxFalseAlarm = ((CvCascadeBoostParams&)_params).maxFalseAlarm;
1384
return ( ( minHitRate > 0 ) && ( minHitRate < 1) &&
1385
( maxFalseAlarm > 0 ) && ( maxFalseAlarm < 1) &&
1386
CvBoost::set_params( _params ));
1387
}
1388
1389
void CvCascadeBoost::update_weights( CvBoostTree* tree )
1390
{
1391
int n = data->sample_count;
1392
double sumW = 0.;
1393
int step = 0;
1394
float* fdata = 0;
1395
int *sampleIdxBuf;
1396
const int* sampleIdx = 0;
1397
int inn_buf_size = ((params.boost_type == LOGIT) || (params.boost_type == GENTLE) ? n*sizeof(int) : 0) +
1398
( !tree ? n*sizeof(int) : 0 );
1399
cv::AutoBuffer<uchar> inn_buf(inn_buf_size);
1400
uchar* cur_inn_buf_pos = inn_buf.data();
1401
if ( (params.boost_type == LOGIT) || (params.boost_type == GENTLE) )
1402
{
1403
step = CV_IS_MAT_CONT(data->responses_copy->type) ?
1404
1 : data->responses_copy->step / CV_ELEM_SIZE(data->responses_copy->type);
1405
fdata = data->responses_copy->data.fl;
1406
sampleIdxBuf = (int*)cur_inn_buf_pos; cur_inn_buf_pos = (uchar*)(sampleIdxBuf + n);
1407
sampleIdx = data->get_sample_indices( data->data_root, sampleIdxBuf );
1408
}
1409
CvMat* buf = data->buf;
1410
size_t length_buf_row = data->get_length_subbuf();
1411
if( !tree ) // before training the first tree, initialize weights and other parameters
1412
{
1413
int* classLabelsBuf = (int*)cur_inn_buf_pos; cur_inn_buf_pos = (uchar*)(classLabelsBuf + n);
1414
const int* classLabels = data->get_class_labels(data->data_root, classLabelsBuf);
1415
// in case of logitboost and gentle adaboost each weak tree is a regression tree,
1416
// so we need to convert class labels to floating-point values
1417
double w0 = 1./n;
1418
double p[2] = { 1, 1 };
1419
1420
cvReleaseMat( &orig_response );
1421
cvReleaseMat( &sum_response );
1422
cvReleaseMat( &weak_eval );
1423
cvReleaseMat( &subsample_mask );
1424
cvReleaseMat( &weights );
1425
1426
orig_response = cvCreateMat( 1, n, CV_32S );
1427
weak_eval = cvCreateMat( 1, n, CV_64F );
1428
subsample_mask = cvCreateMat( 1, n, CV_8U );
1429
weights = cvCreateMat( 1, n, CV_64F );
1430
subtree_weights = cvCreateMat( 1, n + 2, CV_64F );
1431
1432
if (data->is_buf_16u)
1433
{
1434
unsigned short* labels = (unsigned short*)(buf->data.s + data->data_root->buf_idx*length_buf_row +
1435
data->data_root->offset + (size_t)(data->work_var_count-1)*data->sample_count);
1436
for( int i = 0; i < n; i++ )
1437
{
1438
// save original categorical responses {0,1}, convert them to {-1,1}
1439
orig_response->data.i[i] = classLabels[i]*2 - 1;
1440
// make all the samples active at start.
1441
// later, in trim_weights() deactivate/reactive again some, if need
1442
subsample_mask->data.ptr[i] = (uchar)1;
1443
// make all the initial weights the same.
1444
weights->data.db[i] = w0*p[classLabels[i]];
1445
// set the labels to find (from within weak tree learning proc)
1446
// the particular sample weight, and where to store the response.
1447
labels[i] = (unsigned short)i;
1448
}
1449
}
1450
else
1451
{
1452
int* labels = buf->data.i + data->data_root->buf_idx*length_buf_row +
1453
data->data_root->offset + (size_t)(data->work_var_count-1)*data->sample_count;
1454
1455
for( int i = 0; i < n; i++ )
1456
{
1457
// save original categorical responses {0,1}, convert them to {-1,1}
1458
orig_response->data.i[i] = classLabels[i]*2 - 1;
1459
subsample_mask->data.ptr[i] = (uchar)1;
1460
weights->data.db[i] = w0*p[classLabels[i]];
1461
labels[i] = i;
1462
}
1463
}
1464
1465
if( params.boost_type == LOGIT )
1466
{
1467
sum_response = cvCreateMat( 1, n, CV_64F );
1468
1469
for( int i = 0; i < n; i++ )
1470
{
1471
sum_response->data.db[i] = 0;
1472
fdata[sampleIdx[i]*step] = orig_response->data.i[i] > 0 ? 2.f : -2.f;
1473
}
1474
1475
// in case of logitboost each weak tree is a regression tree.
1476
// the target function values are recalculated for each of the trees
1477
data->is_classifier = false;
1478
}
1479
else if( params.boost_type == GENTLE )
1480
{
1481
for( int i = 0; i < n; i++ )
1482
fdata[sampleIdx[i]*step] = (float)orig_response->data.i[i];
1483
1484
data->is_classifier = false;
1485
}
1486
}
1487
else
1488
{
1489
// at this moment, for all the samples that participated in the training of the most
1490
// recent weak classifier we know the responses. For other samples we need to compute them
1491
if( have_subsample )
1492
{
1493
// invert the subsample mask
1494
cvXorS( subsample_mask, cvScalar(1.), subsample_mask );
1495
1496
// run tree through all the non-processed samples
1497
for( int i = 0; i < n; i++ )
1498
if( subsample_mask->data.ptr[i] )
1499
{
1500
weak_eval->data.db[i] = ((CvCascadeBoostTree*)tree)->predict( i )->value;
1501
}
1502
}
1503
1504
// now update weights and other parameters for each type of boosting
1505
if( params.boost_type == DISCRETE )
1506
{
1507
// Discrete AdaBoost:
1508
// weak_eval[i] (=f(x_i)) is in {-1,1}
1509
// err = sum(w_i*(f(x_i) != y_i))/sum(w_i)
1510
// C = log((1-err)/err)
1511
// w_i *= exp(C*(f(x_i) != y_i))
1512
1513
double C, err = 0.;
1514
double scale[] = { 1., 0. };
1515
1516
for( int i = 0; i < n; i++ )
1517
{
1518
double w = weights->data.db[i];
1519
sumW += w;
1520
err += w*(weak_eval->data.db[i] != orig_response->data.i[i]);
1521
}
1522
1523
if( sumW != 0 )
1524
err /= sumW;
1525
C = err = -logRatio( err );
1526
scale[1] = exp(err);
1527
1528
sumW = 0;
1529
for( int i = 0; i < n; i++ )
1530
{
1531
double w = weights->data.db[i]*
1532
scale[weak_eval->data.db[i] != orig_response->data.i[i]];
1533
sumW += w;
1534
weights->data.db[i] = w;
1535
}
1536
1537
tree->scale( C );
1538
}
1539
else if( params.boost_type == REAL )
1540
{
1541
// Real AdaBoost:
1542
// weak_eval[i] = f(x_i) = 0.5*log(p(x_i)/(1-p(x_i))), p(x_i)=P(y=1|x_i)
1543
// w_i *= exp(-y_i*f(x_i))
1544
1545
for( int i = 0; i < n; i++ )
1546
weak_eval->data.db[i] *= -orig_response->data.i[i];
1547
1548
cvExp( weak_eval, weak_eval );
1549
1550
for( int i = 0; i < n; i++ )
1551
{
1552
double w = weights->data.db[i]*weak_eval->data.db[i];
1553
sumW += w;
1554
weights->data.db[i] = w;
1555
}
1556
}
1557
else if( params.boost_type == LOGIT )
1558
{
1559
// LogitBoost:
1560
// weak_eval[i] = f(x_i) in [-z_max,z_max]
1561
// sum_response = F(x_i).
1562
// F(x_i) += 0.5*f(x_i)
1563
// p(x_i) = exp(F(x_i))/(exp(F(x_i)) + exp(-F(x_i))=1/(1+exp(-2*F(x_i)))
1564
// reuse weak_eval: weak_eval[i] <- p(x_i)
1565
// w_i = p(x_i)*1(1 - p(x_i))
1566
// z_i = ((y_i+1)/2 - p(x_i))/(p(x_i)*(1 - p(x_i)))
1567
// store z_i to the data->data_root as the new target responses
1568
1569
const double lbWeightThresh = FLT_EPSILON;
1570
const double lbZMax = 10.;
1571
1572
for( int i = 0; i < n; i++ )
1573
{
1574
double s = sum_response->data.db[i] + 0.5*weak_eval->data.db[i];
1575
sum_response->data.db[i] = s;
1576
weak_eval->data.db[i] = -2*s;
1577
}
1578
1579
cvExp( weak_eval, weak_eval );
1580
1581
for( int i = 0; i < n; i++ )
1582
{
1583
double p = 1./(1. + weak_eval->data.db[i]);
1584
double w = p*(1 - p), z;
1585
w = MAX( w, lbWeightThresh );
1586
weights->data.db[i] = w;
1587
sumW += w;
1588
if( orig_response->data.i[i] > 0 )
1589
{
1590
z = 1./p;
1591
fdata[sampleIdx[i]*step] = (float)min(z, lbZMax);
1592
}
1593
else
1594
{
1595
z = 1./(1-p);
1596
fdata[sampleIdx[i]*step] = (float)-min(z, lbZMax);
1597
}
1598
}
1599
}
1600
else
1601
{
1602
// Gentle AdaBoost:
1603
// weak_eval[i] = f(x_i) in [-1,1]
1604
// w_i *= exp(-y_i*f(x_i))
1605
assert( params.boost_type == GENTLE );
1606
1607
for( int i = 0; i < n; i++ )
1608
weak_eval->data.db[i] *= -orig_response->data.i[i];
1609
1610
cvExp( weak_eval, weak_eval );
1611
1612
for( int i = 0; i < n; i++ )
1613
{
1614
double w = weights->data.db[i] * weak_eval->data.db[i];
1615
weights->data.db[i] = w;
1616
sumW += w;
1617
}
1618
}
1619
}
1620
1621
// renormalize weights
1622
if( sumW > FLT_EPSILON )
1623
{
1624
sumW = 1./sumW;
1625
for( int i = 0; i < n; ++i )
1626
weights->data.db[i] *= sumW;
1627
}
1628
}
1629
1630
bool CvCascadeBoost::isErrDesired()
1631
{
1632
int sCount = data->sample_count,
1633
numPos = 0, numNeg = 0, numFalse = 0, numPosTrue = 0;
1634
vector<float> eval(sCount);
1635
1636
for( int i = 0; i < sCount; i++ )
1637
if( ((CvCascadeBoostTrainData*)data)->featureEvaluator->getCls( i ) == 1.0F )
1638
eval[numPos++] = predict( i, true );
1639
1640
std::sort(&eval[0], &eval[0] + numPos);
1641
1642
int thresholdIdx = (int)((1.0F - minHitRate) * numPos);
1643
1644
threshold = eval[ thresholdIdx ];
1645
numPosTrue = numPos - thresholdIdx;
1646
for( int i = thresholdIdx - 1; i >= 0; i--)
1647
if ( abs( eval[i] - threshold) < FLT_EPSILON )
1648
numPosTrue++;
1649
float hitRate = ((float) numPosTrue) / ((float) numPos);
1650
1651
for( int i = 0; i < sCount; i++ )
1652
{
1653
if( ((CvCascadeBoostTrainData*)data)->featureEvaluator->getCls( i ) == 0.0F )
1654
{
1655
numNeg++;
1656
if( predict( i ) )
1657
numFalse++;
1658
}
1659
}
1660
float falseAlarm = ((float) numFalse) / ((float) numNeg);
1661
1662
cout << "|"; cout.width(4); cout << right << weak->total;
1663
cout << "|"; cout.width(9); cout << right << hitRate;
1664
cout << "|"; cout.width(9); cout << right << falseAlarm;
1665
cout << "|" << endl;
1666
cout << "+----+---------+---------+" << endl;
1667
1668
return falseAlarm <= maxFalseAlarm;
1669
}
1670
1671
void CvCascadeBoost::write( FileStorage &fs, const Mat& featureMap ) const
1672
{
1673
// char cmnt[30];
1674
CvCascadeBoostTree* weakTree;
1675
fs << CC_WEAK_COUNT << weak->total;
1676
fs << CC_STAGE_THRESHOLD << threshold;
1677
fs << CC_WEAK_CLASSIFIERS << "[";
1678
for( int wi = 0; wi < weak->total; wi++)
1679
{
1680
/*sprintf( cmnt, "tree %i", wi );
1681
cvWriteComment( fs, cmnt, 0 );*/
1682
weakTree = *((CvCascadeBoostTree**) cvGetSeqElem( weak, wi ));
1683
weakTree->write( fs, featureMap );
1684
}
1685
fs << "]";
1686
}
1687
1688
bool CvCascadeBoost::read( const FileNode &node,
1689
const CvFeatureEvaluator* _featureEvaluator,
1690
const CvCascadeBoostParams& _params )
1691
{
1692
CvMemStorage* storage;
1693
clear();
1694
data = new CvCascadeBoostTrainData( _featureEvaluator, _params );
1695
set_params( _params );
1696
1697
node[CC_STAGE_THRESHOLD] >> threshold;
1698
FileNode rnode = node[CC_WEAK_CLASSIFIERS];
1699
1700
storage = cvCreateMemStorage();
1701
weak = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvBoostTree*), storage );
1702
for( FileNodeIterator it = rnode.begin(); it != rnode.end(); it++ )
1703
{
1704
CvCascadeBoostTree* tree = new CvCascadeBoostTree();
1705
tree->read( *it, this, data );
1706
cvSeqPush( weak, &tree );
1707
}
1708
return true;
1709
}
1710
1711
void CvCascadeBoost::markUsedFeaturesInMap( Mat& featureMap )
1712
{
1713
for( int wi = 0; wi < weak->total; wi++ )
1714
{
1715
CvCascadeBoostTree* weakTree = *((CvCascadeBoostTree**) cvGetSeqElem( weak, wi ));
1716
weakTree->markFeaturesInMap( featureMap );
1717
}
1718
}
1719
1720