CoCalc -- boost.cpp

GitHub Repository: Tetragramm/opencv
Path: blob/master/apps/traincascade/boost.cpp
¹⁶³³⁷ views
1
#include "opencv2/core.hpp"
2
#include "opencv2/core/utility.hpp"
3

4
using cv::Size;
5
using cv::Mat;
6
using cv::Point;
7
using cv::FileStorage;
8
using cv::Rect;
9
using cv::Ptr;
10
using cv::FileNode;
11
using cv::Mat_;
12
using cv::Range;
13
using cv::FileNodeIterator;
14
using cv::ParallelLoopBody;
15

16

17
using cv::Size;
18
using cv::Mat;
19
using cv::Point;
20
using cv::FileStorage;
21
using cv::Rect;
22
using cv::Ptr;
23
using cv::FileNode;
24
using cv::Mat_;
25
using cv::Range;
26
using cv::FileNodeIterator;
27
using cv::ParallelLoopBody;
28

29

30
#include "boost.h"
31
#include "cascadeclassifier.h"
32
#include <queue>
33

34
#include "cvconfig.h"
35

36
using namespace std;
37

38
static inline double
39
logRatio( double val )
40
{
41
    const double eps = 1e-5;
42

43
    val = max( val, eps );
44
    val = min( val, 1. - eps );
45
    return log( val/(1. - val) );
46
}
47

48
template<typename T, typename Idx>
49
class LessThanIdx
50
{
51
public:
52
    LessThanIdx( const T* _arr ) : arr(_arr) {}
53
    bool operator()(Idx a, Idx b) const { return arr[a] < arr[b]; }
54
    const T* arr;
55
};
56

57
static inline int cvAlign( int size, int align )
58
{
59
    CV_DbgAssert( (align & (align-1)) == 0 && size < INT_MAX );
60
    return (size + align - 1) & -align;
61
}
62

63
#define CV_THRESHOLD_EPS (0.00001F)
64

65
static const int MinBlockSize = 1 << 16;
66
static const int BlockSizeDelta = 1 << 10;
67

68
// TODO remove this code duplication with ml/precomp.hpp
69

70
static int CV_CDECL icvCmpIntegers( const void* a, const void* b )
71
{
72
    return *(const int*)a - *(const int*)b;
73
}
74

75
static CvMat* cvPreprocessIndexArray( const CvMat* idx_arr, int data_arr_size, bool check_for_duplicates=false )
76
{
77
    CvMat* idx = 0;
78

79
    CV_FUNCNAME( "cvPreprocessIndexArray" );
80

81
    __CV_BEGIN__;
82

83
    int i, idx_total, idx_selected = 0, step, type, prev = INT_MIN, is_sorted = 1;
84
    uchar* srcb = 0;
85
    int* srci = 0;
86
    int* dsti;
87

88
    if( !CV_IS_MAT(idx_arr) )
89
        CV_ERROR( CV_StsBadArg, "Invalid index array" );
90

91
    if( idx_arr->rows != 1 && idx_arr->cols != 1 )
92
        CV_ERROR( CV_StsBadSize, "the index array must be 1-dimensional" );
93

94
    idx_total = idx_arr->rows + idx_arr->cols - 1;
95
    srcb = idx_arr->data.ptr;
96
    srci = idx_arr->data.i;
97

98
    type = CV_MAT_TYPE(idx_arr->type);
99
    step = CV_IS_MAT_CONT(idx_arr->type) ? 1 : idx_arr->step/CV_ELEM_SIZE(type);
100

101
    switch( type )
102
    {
103
    case CV_8UC1:
104
    case CV_8SC1:
105
        // idx_arr is array of 1's and 0's -
106
        // i.e. it is a mask of the selected components
107
        if( idx_total != data_arr_size )
108
            CV_ERROR( CV_StsUnmatchedSizes,
109
            "Component mask should contain as many elements as the total number of input variables" );
110

111
        for( i = 0; i < idx_total; i++ )
112
            idx_selected += srcb[i*step] != 0;
113

114
        if( idx_selected == 0 )
115
            CV_ERROR( CV_StsOutOfRange, "No components/input_variables is selected!" );
116

117
        break;
118
    case CV_32SC1:
119
        // idx_arr is array of integer indices of selected components
120
        if( idx_total > data_arr_size )
121
            CV_ERROR( CV_StsOutOfRange,
122
            "index array may not contain more elements than the total number of input variables" );
123
        idx_selected = idx_total;
124
        // check if sorted already
125
        for( i = 0; i < idx_total; i++ )
126
        {
127
            int val = srci[i*step];
128
            if( val >= prev )
129
            {
130
                is_sorted = 0;
131
                break;
132
            }
133
            prev = val;
134
        }
135
        break;
136
    default:
137
        CV_ERROR( CV_StsUnsupportedFormat, "Unsupported index array data type "
138
                                           "(it should be 8uC1, 8sC1 or 32sC1)" );
139
    }
140

141
    CV_CALL( idx = cvCreateMat( 1, idx_selected, CV_32SC1 ));
142
    dsti = idx->data.i;
143

144
    if( type < CV_32SC1 )
145
    {
146
        for( i = 0; i < idx_total; i++ )
147
            if( srcb[i*step] )
148
                *dsti++ = i;
149
    }
150
    else
151
    {
152
        for( i = 0; i < idx_total; i++ )
153
            dsti[i] = srci[i*step];
154

155
        if( !is_sorted )
156
            qsort( dsti, idx_total, sizeof(dsti[0]), icvCmpIntegers );
157

158
        if( dsti[0] < 0 || dsti[idx_total-1] >= data_arr_size )
159
            CV_ERROR( CV_StsOutOfRange, "the index array elements are out of range" );
160

161
        if( check_for_duplicates )
162
        {
163
            for( i = 1; i < idx_total; i++ )
164
                if( dsti[i] <= dsti[i-1] )
165
                    CV_ERROR( CV_StsBadArg, "There are duplicated index array elements" );
166
        }
167
    }
168

169
    __CV_END__;
170

171
    if( cvGetErrStatus() < 0 )
172
        cvReleaseMat( &idx );
173

174
    return idx;
175
}
176

177
//----------------------------- CascadeBoostParams -------------------------------------------------
178

179
CvCascadeBoostParams::CvCascadeBoostParams() : minHitRate( 0.995F), maxFalseAlarm( 0.5F )
180
{
181
    boost_type = CvBoost::GENTLE;
182
    use_surrogates = use_1se_rule = truncate_pruned_tree = false;
183
}
184

185
CvCascadeBoostParams::CvCascadeBoostParams( int _boostType,
186
        float _minHitRate, float _maxFalseAlarm,
187
        double _weightTrimRate, int _maxDepth, int _maxWeakCount ) :
188
    CvBoostParams( _boostType, _maxWeakCount, _weightTrimRate, _maxDepth, false, 0 )
189
{
190
    boost_type = CvBoost::GENTLE;
191
    minHitRate = _minHitRate;
192
    maxFalseAlarm = _maxFalseAlarm;
193
    use_surrogates = use_1se_rule = truncate_pruned_tree = false;
194
}
195

196
void CvCascadeBoostParams::write( FileStorage &fs ) const
197
{
198
    string boostTypeStr = boost_type == CvBoost::DISCRETE ? CC_DISCRETE_BOOST :
199
                          boost_type == CvBoost::REAL ? CC_REAL_BOOST :
200
                          boost_type == CvBoost::LOGIT ? CC_LOGIT_BOOST :
201
                          boost_type == CvBoost::GENTLE ? CC_GENTLE_BOOST : string();
202
    CV_Assert( !boostTypeStr.empty() );
203
    fs << CC_BOOST_TYPE << boostTypeStr;
204
    fs << CC_MINHITRATE << minHitRate;
205
    fs << CC_MAXFALSEALARM << maxFalseAlarm;
206
    fs << CC_TRIM_RATE << weight_trim_rate;
207
    fs << CC_MAX_DEPTH << max_depth;
208
    fs << CC_WEAK_COUNT << weak_count;
209
}
210

211
bool CvCascadeBoostParams::read( const FileNode &node )
212
{
213
    string boostTypeStr;
214
    FileNode rnode = node[CC_BOOST_TYPE];
215
    rnode >> boostTypeStr;
216
    boost_type = !boostTypeStr.compare( CC_DISCRETE_BOOST ) ? CvBoost::DISCRETE :
217
                 !boostTypeStr.compare( CC_REAL_BOOST ) ? CvBoost::REAL :
218
                 !boostTypeStr.compare( CC_LOGIT_BOOST ) ? CvBoost::LOGIT :
219
                 !boostTypeStr.compare( CC_GENTLE_BOOST ) ? CvBoost::GENTLE : -1;
220
    if (boost_type == -1)
221
        CV_Error( CV_StsBadArg, "unsupported Boost type" );
222
    node[CC_MINHITRATE] >> minHitRate;
223
    node[CC_MAXFALSEALARM] >> maxFalseAlarm;
224
    node[CC_TRIM_RATE] >> weight_trim_rate ;
225
    node[CC_MAX_DEPTH] >> max_depth ;
226
    node[CC_WEAK_COUNT] >> weak_count ;
227
    if ( minHitRate <= 0 || minHitRate > 1 ||
228
         maxFalseAlarm <= 0 || maxFalseAlarm > 1 ||
229
         weight_trim_rate <= 0 || weight_trim_rate > 1 ||
230
         max_depth <= 0 || weak_count <= 0 )
231
        CV_Error( CV_StsBadArg, "bad parameters range");
232
    return true;
233
}
234

235
void CvCascadeBoostParams::printDefaults() const
236
{
237
    cout << "--boostParams--" << endl;
238
    cout << "  [-bt <{" << CC_DISCRETE_BOOST << ", "
239
                        << CC_REAL_BOOST << ", "
240
                        << CC_LOGIT_BOOST ", "
241
                        << CC_GENTLE_BOOST << "(default)}>]" << endl;
242
    cout << "  [-minHitRate <min_hit_rate> = " << minHitRate << ">]" << endl;
243
    cout << "  [-maxFalseAlarmRate <max_false_alarm_rate = " << maxFalseAlarm << ">]" << endl;
244
    cout << "  [-weightTrimRate <weight_trim_rate = " << weight_trim_rate << ">]" << endl;
245
    cout << "  [-maxDepth <max_depth_of_weak_tree = " << max_depth << ">]" << endl;
246
    cout << "  [-maxWeakCount <max_weak_tree_count = " << weak_count << ">]" << endl;
247
}
248

249
void CvCascadeBoostParams::printAttrs() const
250
{
251
    string boostTypeStr = boost_type == CvBoost::DISCRETE ? CC_DISCRETE_BOOST :
252
                          boost_type == CvBoost::REAL ? CC_REAL_BOOST :
253
                          boost_type == CvBoost::LOGIT  ? CC_LOGIT_BOOST :
254
                          boost_type == CvBoost::GENTLE ? CC_GENTLE_BOOST : string();
255
    CV_Assert( !boostTypeStr.empty() );
256
    cout << "boostType: " << boostTypeStr << endl;
257
    cout << "minHitRate: " << minHitRate << endl;
258
    cout << "maxFalseAlarmRate: " <<  maxFalseAlarm << endl;
259
    cout << "weightTrimRate: " << weight_trim_rate << endl;
260
    cout << "maxDepth: " << max_depth << endl;
261
    cout << "maxWeakCount: " << weak_count << endl;
262
}
263

264
bool CvCascadeBoostParams::scanAttr( const string prmName, const string val)
265
{
266
    bool res = true;
267

268
    if( !prmName.compare( "-bt" ) )
269
    {
270
        boost_type = !val.compare( CC_DISCRETE_BOOST ) ? CvBoost::DISCRETE :
271
                     !val.compare( CC_REAL_BOOST ) ? CvBoost::REAL :
272
                     !val.compare( CC_LOGIT_BOOST ) ? CvBoost::LOGIT :
273
                     !val.compare( CC_GENTLE_BOOST ) ? CvBoost::GENTLE : -1;
274
        if (boost_type == -1)
275
            res = false;
276
    }
277
    else if( !prmName.compare( "-minHitRate" ) )
278
    {
279
        minHitRate = (float) atof( val.c_str() );
280
    }
281
    else if( !prmName.compare( "-maxFalseAlarmRate" ) )
282
    {
283
        maxFalseAlarm = (float) atof( val.c_str() );
284
    }
285
    else if( !prmName.compare( "-weightTrimRate" ) )
286
    {
287
        weight_trim_rate = (float) atof( val.c_str() );
288
    }
289
    else if( !prmName.compare( "-maxDepth" ) )
290
    {
291
        max_depth = atoi( val.c_str() );
292
    }
293
    else if( !prmName.compare( "-maxWeakCount" ) )
294
    {
295
        weak_count = atoi( val.c_str() );
296
    }
297
    else
298
        res = false;
299

300
    return res;
301
}
302

303
CvDTreeNode* CvCascadeBoostTrainData::subsample_data( const CvMat* _subsample_idx )
304
{
305
    CvDTreeNode* root = 0;
306
    CvMat* isubsample_idx = 0;
307
    CvMat* subsample_co = 0;
308

309
    bool isMakeRootCopy = true;
310

311
    if( !data_root )
312
        CV_Error( CV_StsError, "No training data has been set" );
313

314
    if( _subsample_idx )
315
    {
316
        CV_Assert( (isubsample_idx = cvPreprocessIndexArray( _subsample_idx, sample_count )) != 0 );
317

318
        if( isubsample_idx->cols + isubsample_idx->rows - 1 == sample_count )
319
        {
320
            const int* sidx = isubsample_idx->data.i;
321
            for( int i = 0; i < sample_count; i++ )
322
            {
323
                if( sidx[i] != i )
324
                {
325
                    isMakeRootCopy = false;
326
                    break;
327
                }
328
            }
329
        }
330
        else
331
            isMakeRootCopy = false;
332
    }
333

334
    if( isMakeRootCopy )
335
    {
336
        // make a copy of the root node
337
        CvDTreeNode temp;
338
        int i;
339
        root = new_node( 0, 1, 0, 0 );
340
        temp = *root;
341
        *root = *data_root;
342
        root->num_valid = temp.num_valid;
343
        if( root->num_valid )
344
        {
345
            for( i = 0; i < var_count; i++ )
346
                root->num_valid[i] = data_root->num_valid[i];
347
        }
348
        root->cv_Tn = temp.cv_Tn;
349
        root->cv_node_risk = temp.cv_node_risk;
350
        root->cv_node_error = temp.cv_node_error;
351
    }
352
    else
353
    {
354
        int* sidx = isubsample_idx->data.i;
355
        // co - array of count/offset pairs (to handle duplicated values in _subsample_idx)
356
        int* co, cur_ofs = 0;
357
        int workVarCount = get_work_var_count();
358
        int count = isubsample_idx->rows + isubsample_idx->cols - 1;
359

360
        root = new_node( 0, count, 1, 0 );
361

362
        CV_Assert( (subsample_co = cvCreateMat( 1, sample_count*2, CV_32SC1 )) != 0);
363
        cvZero( subsample_co );
364
        co = subsample_co->data.i;
365
        for( int i = 0; i < count; i++ )
366
            co[sidx[i]*2]++;
367
        for( int i = 0; i < sample_count; i++ )
368
        {
369
            if( co[i*2] )
370
            {
371
                co[i*2+1] = cur_ofs;
372
                cur_ofs += co[i*2];
373
            }
374
            else
375
                co[i*2+1] = -1;
376
        }
377

378
        cv::AutoBuffer<uchar> inn_buf(sample_count*(2*sizeof(int) + sizeof(float)));
379
        // subsample ordered variables
380
        for( int vi = 0; vi < numPrecalcIdx; vi++ )
381
        {
382
            int ci = get_var_type(vi);
383
            CV_Assert( ci < 0 );
384

385
            int *src_idx_buf = (int*)inn_buf.data();
386
            float *src_val_buf = (float*)(src_idx_buf + sample_count);
387
            int* sample_indices_buf = (int*)(src_val_buf + sample_count);
388
            const int* src_idx = 0;
389
            const float* src_val = 0;
390
            get_ord_var_data( data_root, vi, src_val_buf, src_idx_buf, &src_val, &src_idx, sample_indices_buf );
391

392
            int j = 0, idx, count_i;
393
            int num_valid = data_root->get_num_valid(vi);
394
            CV_Assert( num_valid == sample_count );
395

396
            if (is_buf_16u)
397
            {
398
                unsigned short* udst_idx = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() +
399
                    (size_t)vi*sample_count + data_root->offset);
400
                for( int i = 0; i < num_valid; i++ )
401
                {
402
                    idx = src_idx[i];
403
                    count_i = co[idx*2];
404
                    if( count_i )
405
                        for( cur_ofs = co[idx*2+1]; count_i > 0; count_i--, j++, cur_ofs++ )
406
                            udst_idx[j] = (unsigned short)cur_ofs;
407
                }
408
            }
409
            else
410
            {
411
                int* idst_idx = buf->data.i + root->buf_idx*get_length_subbuf() +
412
                    (size_t)vi*sample_count + root->offset;
413
                for( int i = 0; i < num_valid; i++ )
414
                {
415
                    idx = src_idx[i];
416
                    count_i = co[idx*2];
417
                    if( count_i )
418
                        for( cur_ofs = co[idx*2+1]; count_i > 0; count_i--, j++, cur_ofs++ )
419
                            idst_idx[j] = cur_ofs;
420
                }
421
            }
422
        }
423

424
        // subsample cv_lables
425
        const int* src_lbls = get_cv_labels(data_root, (int*)inn_buf.data());
426
        if (is_buf_16u)
427
        {
428
            unsigned short* udst = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() +
429
                (size_t)(workVarCount-1)*sample_count + root->offset);
430
            for( int i = 0; i < count; i++ )
431
                udst[i] = (unsigned short)src_lbls[sidx[i]];
432
        }
433
        else
434
        {
435
            int* idst = buf->data.i + root->buf_idx*get_length_subbuf() +
436
                (size_t)(workVarCount-1)*sample_count + root->offset;
437
            for( int i = 0; i < count; i++ )
438
                idst[i] = src_lbls[sidx[i]];
439
        }
440

441
        // subsample sample_indices
442
        const int* sample_idx_src = get_sample_indices(data_root, (int*)inn_buf.data());
443
        if (is_buf_16u)
444
        {
445
            unsigned short* sample_idx_dst = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() +
446
                (size_t)workVarCount*sample_count + root->offset);
447
            for( int i = 0; i < count; i++ )
448
                sample_idx_dst[i] = (unsigned short)sample_idx_src[sidx[i]];
449
        }
450
        else
451
        {
452
            int* sample_idx_dst = buf->data.i + root->buf_idx*get_length_subbuf() +
453
                (size_t)workVarCount*sample_count + root->offset;
454
            for( int i = 0; i < count; i++ )
455
                sample_idx_dst[i] = sample_idx_src[sidx[i]];
456
        }
457

458
        for( int vi = 0; vi < var_count; vi++ )
459
            root->set_num_valid(vi, count);
460
    }
461

462
    cvReleaseMat( &isubsample_idx );
463
    cvReleaseMat( &subsample_co );
464

465
    return root;
466
}
467

468
//---------------------------- CascadeBoostTrainData -----------------------------
469

470
CvCascadeBoostTrainData::CvCascadeBoostTrainData( const CvFeatureEvaluator* _featureEvaluator,
471
                                                  const CvDTreeParams& _params )
472
{
473
    is_classifier = true;
474
    var_all = var_count = (int)_featureEvaluator->getNumFeatures();
475

476
    featureEvaluator = _featureEvaluator;
477
    shared = true;
478
    set_params( _params );
479
    max_c_count = MAX( 2, featureEvaluator->getMaxCatCount() );
480
    var_type = cvCreateMat( 1, var_count + 2, CV_32SC1 );
481
    if ( featureEvaluator->getMaxCatCount() > 0 )
482
    {
483
        numPrecalcIdx = 0;
484
        cat_var_count = var_count;
485
        ord_var_count = 0;
486
        for( int vi = 0; vi < var_count; vi++ )
487
        {
488
            var_type->data.i[vi] = vi;
489
        }
490
    }
491
    else
492
    {
493
        cat_var_count = 0;
494
        ord_var_count = var_count;
495
        for( int vi = 1; vi <= var_count; vi++ )
496
        {
497
            var_type->data.i[vi-1] = -vi;
498
        }
499
    }
500
    var_type->data.i[var_count] = cat_var_count;
501
    var_type->data.i[var_count+1] = cat_var_count+1;
502

503
    int maxSplitSize = cvAlign(sizeof(CvDTreeSplit) + (MAX(0,max_c_count - 33)/32)*sizeof(int),sizeof(void*));
504
    int treeBlockSize = MAX((int)sizeof(CvDTreeNode)*8, maxSplitSize);
505
    treeBlockSize = MAX(treeBlockSize + BlockSizeDelta, MinBlockSize);
506
    tree_storage = cvCreateMemStorage( treeBlockSize );
507
    node_heap = cvCreateSet( 0, sizeof(node_heap[0]), sizeof(CvDTreeNode), tree_storage );
508
    split_heap = cvCreateSet( 0, sizeof(split_heap[0]), maxSplitSize, tree_storage );
509
}
510

511
CvCascadeBoostTrainData::CvCascadeBoostTrainData( const CvFeatureEvaluator* _featureEvaluator,
512
                                                 int _numSamples,
513
                                                 int _precalcValBufSize, int _precalcIdxBufSize,
514
                                                 const CvDTreeParams& _params )
515
{
516
    setData( _featureEvaluator, _numSamples, _precalcValBufSize, _precalcIdxBufSize, _params );
517
}
518

519
void CvCascadeBoostTrainData::setData( const CvFeatureEvaluator* _featureEvaluator,
520
                                      int _numSamples,
521
                                      int _precalcValBufSize, int _precalcIdxBufSize,
522
                                      const CvDTreeParams& _params )
523
{
524
    int* idst = 0;
525
    unsigned short* udst = 0;
526

527
    uint64 effective_buf_size = 0;
528
    int effective_buf_height = 0, effective_buf_width = 0;
529

530

531
    clear();
532
    shared = true;
533
    have_labels = true;
534
    have_priors = false;
535
    is_classifier = true;
536

537
    rng = &cv::theRNG();
538

539
    set_params( _params );
540

541
    CV_Assert( _featureEvaluator );
542
    featureEvaluator = _featureEvaluator;
543

544
    max_c_count = MAX( 2, featureEvaluator->getMaxCatCount() );
545
    _resp = cvMat(featureEvaluator->getCls());
546
    responses = &_resp;
547
    // TODO: check responses: elements must be 0 or 1
548

549
    if( _precalcValBufSize < 0 || _precalcIdxBufSize < 0)
550
        CV_Error( CV_StsOutOfRange, "_numPrecalcVal and _numPrecalcIdx must be positive or 0" );
551

552
    var_count = var_all = featureEvaluator->getNumFeatures() * featureEvaluator->getFeatureSize();
553
    sample_count = _numSamples;
554

555
    is_buf_16u = false;
556
    if (sample_count < 65536)
557
        is_buf_16u = true;
558

559
    numPrecalcVal = min( cvRound((double)_precalcValBufSize*1048576. / (sizeof(float)*sample_count)), var_count );
560
    numPrecalcIdx = min( cvRound((double)_precalcIdxBufSize*1048576. /
561
                ((is_buf_16u ? sizeof(unsigned short) : sizeof (int))*sample_count)), var_count );
562

563
    assert( numPrecalcIdx >= 0 && numPrecalcVal >= 0 );
564

565
    valCache.create( numPrecalcVal, sample_count, CV_32FC1 );
566
    var_type = cvCreateMat( 1, var_count + 2, CV_32SC1 );
567

568
    if ( featureEvaluator->getMaxCatCount() > 0 )
569
    {
570
        numPrecalcIdx = 0;
571
        cat_var_count = var_count;
572
        ord_var_count = 0;
573
        for( int vi = 0; vi < var_count; vi++ )
574
        {
575
            var_type->data.i[vi] = vi;
576
        }
577
    }
578
    else
579
    {
580
        cat_var_count = 0;
581
        ord_var_count = var_count;
582
        for( int vi = 1; vi <= var_count; vi++ )
583
        {
584
            var_type->data.i[vi-1] = -vi;
585
        }
586
    }
587
    var_type->data.i[var_count] = cat_var_count;
588
    var_type->data.i[var_count+1] = cat_var_count+1;
589
    work_var_count = ( cat_var_count ? 0 : numPrecalcIdx ) + 1/*cv_lables*/;
590
    buf_count = 2;
591

592
    buf_size = -1; // the member buf_size is obsolete
593

594
    effective_buf_size = (uint64)(work_var_count + 1)*(uint64)sample_count * buf_count; // this is the total size of "CvMat buf" to be allocated
595
    effective_buf_width = sample_count;
596
    effective_buf_height = work_var_count+1;
597

598
    if (effective_buf_width >= effective_buf_height)
599
        effective_buf_height *= buf_count;
600
    else
601
        effective_buf_width *= buf_count;
602

603
    if ((uint64)effective_buf_width * (uint64)effective_buf_height != effective_buf_size)
604
    {
605
        CV_Error(CV_StsBadArg, "The memory buffer cannot be allocated since its size exceeds integer fields limit");
606
    }
607

608
    if ( is_buf_16u )
609
        buf = cvCreateMat( effective_buf_height, effective_buf_width, CV_16UC1 );
610
    else
611
        buf = cvCreateMat( effective_buf_height, effective_buf_width, CV_32SC1 );
612

613
    cat_count = cvCreateMat( 1, cat_var_count + 1, CV_32SC1 );
614

615
    // precalculate valCache and set indices in buf
616
    precalculate();
617

618
    // now calculate the maximum size of split,
619
    // create memory storage that will keep nodes and splits of the decision tree
620
    // allocate root node and the buffer for the whole training data
621
    int maxSplitSize = cvAlign(sizeof(CvDTreeSplit) +
622
        (MAX(0,sample_count - 33)/32)*sizeof(int),sizeof(void*));
623
    int treeBlockSize = MAX((int)sizeof(CvDTreeNode)*8, maxSplitSize);
624
    treeBlockSize = MAX(treeBlockSize + BlockSizeDelta, MinBlockSize);
625
    tree_storage = cvCreateMemStorage( treeBlockSize );
626
    node_heap = cvCreateSet( 0, sizeof(*node_heap), sizeof(CvDTreeNode), tree_storage );
627

628
    int nvSize = var_count*sizeof(int);
629
    nvSize = cvAlign(MAX( nvSize, (int)sizeof(CvSetElem) ), sizeof(void*));
630
    int tempBlockSize = nvSize;
631
    tempBlockSize = MAX( tempBlockSize + BlockSizeDelta, MinBlockSize );
632
    temp_storage = cvCreateMemStorage( tempBlockSize );
633
    nv_heap = cvCreateSet( 0, sizeof(*nv_heap), nvSize, temp_storage );
634

635
    data_root = new_node( 0, sample_count, 0, 0 );
636

637
    // set sample labels
638
    if (is_buf_16u)
639
        udst = (unsigned short*)(buf->data.s + (size_t)work_var_count*sample_count);
640
    else
641
        idst = buf->data.i + (size_t)work_var_count*sample_count;
642

643
    for (int si = 0; si < sample_count; si++)
644
    {
645
        if (udst)
646
            udst[si] = (unsigned short)si;
647
        else
648
            idst[si] = si;
649
    }
650
    for( int vi = 0; vi < var_count; vi++ )
651
        data_root->set_num_valid(vi, sample_count);
652
    for( int vi = 0; vi < cat_var_count; vi++ )
653
        cat_count->data.i[vi] = max_c_count;
654

655
    cat_count->data.i[cat_var_count] = 2;
656

657
    maxSplitSize = cvAlign(sizeof(CvDTreeSplit) +
658
        (MAX(0,max_c_count - 33)/32)*sizeof(int),sizeof(void*));
659
    split_heap = cvCreateSet( 0, sizeof(*split_heap), maxSplitSize, tree_storage );
660

661
    priors = cvCreateMat( 1, get_num_classes(), CV_64F );
662
    cvSet(priors, cvScalar(1));
663
    priors_mult = cvCloneMat( priors );
664
    counts = cvCreateMat( 1, get_num_classes(), CV_32SC1 );
665
    direction = cvCreateMat( 1, sample_count, CV_8UC1 );
666
    split_buf = cvCreateMat( 1, sample_count, CV_32SC1 );//TODO: make a pointer
667
}
668

669
void CvCascadeBoostTrainData::free_train_data()
670
{
671
    CvDTreeTrainData::free_train_data();
672
    valCache.release();
673
}
674

675
const int* CvCascadeBoostTrainData::get_class_labels( CvDTreeNode* n, int* labelsBuf)
676
{
677
    int nodeSampleCount = n->sample_count;
678
    int rStep = CV_IS_MAT_CONT( responses->type ) ? 1 : responses->step / CV_ELEM_SIZE( responses->type );
679

680
    int* sampleIndicesBuf = labelsBuf; //
681
    const int* sampleIndices = get_sample_indices(n, sampleIndicesBuf);
682
    for( int si = 0; si < nodeSampleCount; si++ )
683
    {
684
        int sidx = sampleIndices[si];
685
        labelsBuf[si] = (int)responses->data.fl[sidx*rStep];
686
    }
687
    return labelsBuf;
688
}
689

690
const int* CvCascadeBoostTrainData::get_sample_indices( CvDTreeNode* n, int* indicesBuf )
691
{
692
    return CvDTreeTrainData::get_cat_var_data( n, get_work_var_count(), indicesBuf );
693
}
694

695
const int* CvCascadeBoostTrainData::get_cv_labels( CvDTreeNode* n, int* labels_buf )
696
{
697
    return CvDTreeTrainData::get_cat_var_data( n, get_work_var_count() - 1, labels_buf );
698
}
699

700
void CvCascadeBoostTrainData::get_ord_var_data( CvDTreeNode* n, int vi, float* ordValuesBuf, int* sortedIndicesBuf,
701
        const float** ordValues, const int** sortedIndices, int* sampleIndicesBuf )
702
{
703
    int nodeSampleCount = n->sample_count;
704
    const int* sampleIndices = get_sample_indices(n, sampleIndicesBuf);
705

706
    if ( vi < numPrecalcIdx )
707
    {
708
        if( !is_buf_16u )
709
            *sortedIndices = buf->data.i + n->buf_idx*get_length_subbuf() + (size_t)vi*sample_count + n->offset;
710
        else
711
        {
712
            const unsigned short* shortIndices = (const unsigned short*)(buf->data.s + n->buf_idx*get_length_subbuf() +
713
                                                    (size_t)vi*sample_count + n->offset );
714
            for( int i = 0; i < nodeSampleCount; i++ )
715
                sortedIndicesBuf[i] = shortIndices[i];
716

717
            *sortedIndices = sortedIndicesBuf;
718
        }
719

720
        if( vi < numPrecalcVal )
721
        {
722
            for( int i = 0; i < nodeSampleCount; i++ )
723
            {
724
                int idx = (*sortedIndices)[i];
725
                idx = sampleIndices[idx];
726
                ordValuesBuf[i] =  valCache.at<float>( vi, idx);
727
            }
728
        }
729
        else
730
        {
731
            for( int i = 0; i < nodeSampleCount; i++ )
732
            {
733
                int idx = (*sortedIndices)[i];
734
                idx = sampleIndices[idx];
735
                ordValuesBuf[i] = (*featureEvaluator)( vi, idx);
736
            }
737
        }
738
    }
739
    else // vi >= numPrecalcIdx
740
    {
741
        cv::AutoBuffer<float> abuf(nodeSampleCount);
742
        float* sampleValues = &abuf[0];
743

744
        if ( vi < numPrecalcVal )
745
        {
746
            for( int i = 0; i < nodeSampleCount; i++ )
747
            {
748
                sortedIndicesBuf[i] = i;
749
                sampleValues[i] = valCache.at<float>( vi, sampleIndices[i] );
750
            }
751
        }
752
        else
753
        {
754
            for( int i = 0; i < nodeSampleCount; i++ )
755
            {
756
                sortedIndicesBuf[i] = i;
757
                sampleValues[i] = (*featureEvaluator)( vi, sampleIndices[i]);
758
            }
759
        }
760
        std::sort(sortedIndicesBuf, sortedIndicesBuf + nodeSampleCount, LessThanIdx<float, int>(&sampleValues[0]) );
761
        for( int i = 0; i < nodeSampleCount; i++ )
762
            ordValuesBuf[i] = (&sampleValues[0])[sortedIndicesBuf[i]];
763
        *sortedIndices = sortedIndicesBuf;
764
    }
765

766
    *ordValues = ordValuesBuf;
767
}
768

769
const int* CvCascadeBoostTrainData::get_cat_var_data( CvDTreeNode* n, int vi, int* catValuesBuf )
770
{
771
    int nodeSampleCount = n->sample_count;
772
    int* sampleIndicesBuf = catValuesBuf; //
773
    const int* sampleIndices = get_sample_indices(n, sampleIndicesBuf);
774

775
    if ( vi < numPrecalcVal )
776
    {
777
        for( int i = 0; i < nodeSampleCount; i++ )
778
            catValuesBuf[i] = (int) valCache.at<float>( vi, sampleIndices[i]);
779
    }
780
    else
781
    {
782
        if( vi >= numPrecalcVal && vi < var_count )
783
        {
784
            for( int i = 0; i < nodeSampleCount; i++ )
785
                catValuesBuf[i] = (int)(*featureEvaluator)( vi, sampleIndices[i] );
786
        }
787
        else
788
        {
789
            get_cv_labels( n, catValuesBuf );
790
        }
791
    }
792

793
    return catValuesBuf;
794
}
795

796
float CvCascadeBoostTrainData::getVarValue( int vi, int si )
797
{
798
    if ( vi < numPrecalcVal && !valCache.empty() )
799
        return valCache.at<float>( vi, si );
800
    return (*featureEvaluator)( vi, si );
801
}
802

803

804
struct FeatureIdxOnlyPrecalc : ParallelLoopBody
805
{
806
    FeatureIdxOnlyPrecalc( const CvFeatureEvaluator* _featureEvaluator, CvMat* _buf, int _sample_count, bool _is_buf_16u )
807
    {
808
        featureEvaluator = _featureEvaluator;
809
        sample_count = _sample_count;
810
        udst = (unsigned short*)_buf->data.s;
811
        idst = _buf->data.i;
812
        is_buf_16u = _is_buf_16u;
813
    }
814
    void operator()( const Range& range ) const
815
    {
816
        cv::AutoBuffer<float> valCache(sample_count);
817
        float* valCachePtr = valCache.data();
818
        for ( int fi = range.start; fi < range.end; fi++)
819
        {
820
            for( int si = 0; si < sample_count; si++ )
821
            {
822
                valCachePtr[si] = (*featureEvaluator)( fi, si );
823
                if ( is_buf_16u )
824
                    *(udst + (size_t)fi*sample_count + si) = (unsigned short)si;
825
                else
826
                    *(idst + (size_t)fi*sample_count + si) = si;
827
            }
828
            if ( is_buf_16u )
829
                std::sort(udst + (size_t)fi*sample_count, udst + (size_t)(fi + 1)*sample_count, LessThanIdx<float, unsigned short>(valCachePtr) );
830
            else
831
                std::sort(idst + (size_t)fi*sample_count, idst + (size_t)(fi + 1)*sample_count, LessThanIdx<float, int>(valCachePtr) );
832
        }
833
    }
834
    const CvFeatureEvaluator* featureEvaluator;
835
    int sample_count;
836
    int* idst;
837
    unsigned short* udst;
838
    bool is_buf_16u;
839
};
840

841
struct FeatureValAndIdxPrecalc : ParallelLoopBody
842
{
843
    FeatureValAndIdxPrecalc( const CvFeatureEvaluator* _featureEvaluator, CvMat* _buf, Mat* _valCache, int _sample_count, bool _is_buf_16u )
844
    {
845
        featureEvaluator = _featureEvaluator;
846
        valCache = _valCache;
847
        sample_count = _sample_count;
848
        udst = (unsigned short*)_buf->data.s;
849
        idst = _buf->data.i;
850
        is_buf_16u = _is_buf_16u;
851
    }
852
    void operator()( const Range& range ) const
853
    {
854
        for ( int fi = range.start; fi < range.end; fi++)
855
        {
856
            for( int si = 0; si < sample_count; si++ )
857
            {
858
                valCache->at<float>(fi,si) = (*featureEvaluator)( fi, si );
859
                if ( is_buf_16u )
860
                    *(udst + (size_t)fi*sample_count + si) = (unsigned short)si;
861
                else
862
                    *(idst + (size_t)fi*sample_count + si) = si;
863
            }
864
            if ( is_buf_16u )
865
                std::sort(udst + (size_t)fi*sample_count, udst + (size_t)(fi + 1)*sample_count, LessThanIdx<float, unsigned short>(valCache->ptr<float>(fi)) );
866
            else
867
                std::sort(idst + (size_t)fi*sample_count, idst + (size_t)(fi + 1)*sample_count, LessThanIdx<float, int>(valCache->ptr<float>(fi)) );
868
        }
869
    }
870
    const CvFeatureEvaluator* featureEvaluator;
871
    Mat* valCache;
872
    int sample_count;
873
    int* idst;
874
    unsigned short* udst;
875
    bool is_buf_16u;
876
};
877

878
struct FeatureValOnlyPrecalc : ParallelLoopBody
879
{
880
    FeatureValOnlyPrecalc( const CvFeatureEvaluator* _featureEvaluator, Mat* _valCache, int _sample_count )
881
    {
882
        featureEvaluator = _featureEvaluator;
883
        valCache = _valCache;
884
        sample_count = _sample_count;
885
    }
886
    void operator()( const Range& range ) const
887
    {
888
        for ( int fi = range.start; fi < range.end; fi++)
889
            for( int si = 0; si < sample_count; si++ )
890
                valCache->at<float>(fi,si) = (*featureEvaluator)( fi, si );
891
    }
892
    const CvFeatureEvaluator* featureEvaluator;
893
    Mat* valCache;
894
    int sample_count;
895
};
896

897
void CvCascadeBoostTrainData::precalculate()
898
{
899
    int minNum = MIN( numPrecalcVal, numPrecalcIdx);
900

901
    double proctime = -TIME( 0 );
902
    parallel_for_( Range(numPrecalcVal, numPrecalcIdx),
903
                   FeatureIdxOnlyPrecalc(featureEvaluator, buf, sample_count, is_buf_16u!=0) );
904
    parallel_for_( Range(0, minNum),
905
                   FeatureValAndIdxPrecalc(featureEvaluator, buf, &valCache, sample_count, is_buf_16u!=0) );
906
    parallel_for_( Range(minNum, numPrecalcVal),
907
                   FeatureValOnlyPrecalc(featureEvaluator, &valCache, sample_count) );
908
    cout << "Precalculation time: " << (proctime + TIME( 0 )) << endl;
909
}
910

911
//-------------------------------- CascadeBoostTree ----------------------------------------
912

913
CvDTreeNode* CvCascadeBoostTree::predict( int sampleIdx ) const
914
{
915
    CvDTreeNode* node = root;
916
    if( !node )
917
        CV_Error( CV_StsError, "The tree has not been trained yet" );
918

919
    if ( ((CvCascadeBoostTrainData*)data)->featureEvaluator->getMaxCatCount() == 0 ) // ordered
920
    {
921
        while( node->left )
922
        {
923
            CvDTreeSplit* split = node->split;
924
            float val = ((CvCascadeBoostTrainData*)data)->getVarValue( split->var_idx, sampleIdx );
925
            node = val <= split->ord.c ? node->left : node->right;
926
        }
927
    }
928
    else // categorical
929
    {
930
        while( node->left )
931
        {
932
            CvDTreeSplit* split = node->split;
933
            int c = (int)((CvCascadeBoostTrainData*)data)->getVarValue( split->var_idx, sampleIdx );
934
            node = CV_DTREE_CAT_DIR(c, split->subset) < 0 ? node->left : node->right;
935
        }
936
    }
937
    return node;
938
}
939

940
void CvCascadeBoostTree::write( FileStorage &fs, const Mat& featureMap )
941
{
942
    int maxCatCount = ((CvCascadeBoostTrainData*)data)->featureEvaluator->getMaxCatCount();
943
    int subsetN = (maxCatCount + 31)/32;
944
    queue<CvDTreeNode*> internalNodesQueue;
945
    int size = (int)pow( 2.f, (float)ensemble->get_params().max_depth);
946
    std::vector<float> leafVals(size);
947
    int leafValIdx = 0;
948
    int internalNodeIdx = 1;
949
    CvDTreeNode* tempNode;
950

951
    CV_DbgAssert( root );
952
    internalNodesQueue.push( root );
953

954
    fs << "{";
955
    fs << CC_INTERNAL_NODES << "[:";
956
    while (!internalNodesQueue.empty())
957
    {
958
        tempNode = internalNodesQueue.front();
959
        CV_Assert( tempNode->left );
960
        if ( !tempNode->left->left && !tempNode->left->right) // left node is leaf
961
        {
962
            leafVals[-leafValIdx] = (float)tempNode->left->value;
963
            fs << leafValIdx-- ;
964
        }
965
        else
966
        {
967
            internalNodesQueue.push( tempNode->left );
968
            fs << internalNodeIdx++;
969
        }
970
        CV_Assert( tempNode->right );
971
        if ( !tempNode->right->left && !tempNode->right->right) // right node is leaf
972
        {
973
            leafVals[-leafValIdx] = (float)tempNode->right->value;
974
            fs << leafValIdx--;
975
        }
976
        else
977
        {
978
            internalNodesQueue.push( tempNode->right );
979
            fs << internalNodeIdx++;
980
        }
981
        int fidx = tempNode->split->var_idx;
982
        fidx = featureMap.empty() ? fidx : featureMap.at<int>(0, fidx);
983
        fs << fidx;
984
        if ( !maxCatCount )
985
            fs << tempNode->split->ord.c;
986
        else
987
            for( int i = 0; i < subsetN; i++ )
988
                fs << tempNode->split->subset[i];
989
        internalNodesQueue.pop();
990
    }
991
    fs << "]"; // CC_INTERNAL_NODES
992

993
    fs << CC_LEAF_VALUES << "[:";
994
    for (int ni = 0; ni < -leafValIdx; ni++)
995
        fs << leafVals[ni];
996
    fs << "]"; // CC_LEAF_VALUES
997
    fs << "}";
998
}
999

1000
void CvCascadeBoostTree::read( const FileNode &node, CvBoost* _ensemble,
1001
                                CvDTreeTrainData* _data )
1002
{
1003
    int maxCatCount = ((CvCascadeBoostTrainData*)_data)->featureEvaluator->getMaxCatCount();
1004
    int subsetN = (maxCatCount + 31)/32;
1005
    int step = 3 + ( maxCatCount>0 ? subsetN : 1 );
1006

1007
    queue<CvDTreeNode*> internalNodesQueue;
1008
    FileNodeIterator internalNodesIt, leafValsuesIt;
1009
    CvDTreeNode* prntNode, *cldNode;
1010

1011
    clear();
1012
    data = _data;
1013
    ensemble = _ensemble;
1014
    pruned_tree_idx = 0;
1015

1016
    // read tree nodes
1017
    FileNode rnode = node[CC_INTERNAL_NODES];
1018
    internalNodesIt = rnode.end();
1019
    leafValsuesIt = node[CC_LEAF_VALUES].end();
1020
    internalNodesIt--; leafValsuesIt--;
1021
    for( size_t i = 0; i < rnode.size()/step; i++ )
1022
    {
1023
        prntNode = data->new_node( 0, 0, 0, 0 );
1024
        if ( maxCatCount > 0 )
1025
        {
1026
            prntNode->split = data->new_split_cat( 0, 0 );
1027
            for( int j = subsetN-1; j>=0; j--)
1028
            {
1029
                *internalNodesIt >> prntNode->split->subset[j]; internalNodesIt--;
1030
            }
1031
        }
1032
        else
1033
        {
1034
            float split_value;
1035
            *internalNodesIt >> split_value; internalNodesIt--;
1036
            prntNode->split = data->new_split_ord( 0, split_value, 0, 0, 0);
1037
        }
1038
        *internalNodesIt >> prntNode->split->var_idx; internalNodesIt--;
1039
        int ridx, lidx;
1040
        *internalNodesIt >> ridx; internalNodesIt--;
1041
        *internalNodesIt >> lidx;internalNodesIt--;
1042
        if ( ridx <= 0)
1043
        {
1044
            prntNode->right = cldNode = data->new_node( 0, 0, 0, 0 );
1045
            *leafValsuesIt >> cldNode->value; leafValsuesIt--;
1046
            cldNode->parent = prntNode;
1047
        }
1048
        else
1049
        {
1050
            prntNode->right = internalNodesQueue.front();
1051
            prntNode->right->parent = prntNode;
1052
            internalNodesQueue.pop();
1053
        }
1054

1055
        if ( lidx <= 0)
1056
        {
1057
            prntNode->left = cldNode = data->new_node( 0, 0, 0, 0 );
1058
            *leafValsuesIt >> cldNode->value; leafValsuesIt--;
1059
            cldNode->parent = prntNode;
1060
        }
1061
        else
1062
        {
1063
            prntNode->left = internalNodesQueue.front();
1064
            prntNode->left->parent = prntNode;
1065
            internalNodesQueue.pop();
1066
        }
1067

1068
        internalNodesQueue.push( prntNode );
1069
    }
1070

1071
    root = internalNodesQueue.front();
1072
    internalNodesQueue.pop();
1073
}
1074

1075
void CvCascadeBoostTree::split_node_data( CvDTreeNode* node )
1076
{
1077
    int n = node->sample_count, nl, nr, scount = data->sample_count;
1078
    char* dir = (char*)data->direction->data.ptr;
1079
    CvDTreeNode *left = 0, *right = 0;
1080
    int* newIdx = data->split_buf->data.i;
1081
    int newBufIdx = data->get_child_buf_idx( node );
1082
    int workVarCount = data->get_work_var_count();
1083
    CvMat* buf = data->buf;
1084
    size_t length_buf_row = data->get_length_subbuf();
1085
    cv::AutoBuffer<uchar> inn_buf(n*(3*sizeof(int)+sizeof(float)));
1086
    int* tempBuf = (int*)inn_buf.data();
1087
    bool splitInputData;
1088

1089
    complete_node_dir(node);
1090

1091
    for( int i = nl = nr = 0; i < n; i++ )
1092
    {
1093
        int d = dir[i];
1094
        // initialize new indices for splitting ordered variables
1095
        newIdx[i] = (nl & (d-1)) | (nr & -d); // d ? ri : li
1096
        nr += d;
1097
        nl += d^1;
1098
    }
1099

1100
    node->left = left = data->new_node( node, nl, newBufIdx, node->offset );
1101
    node->right = right = data->new_node( node, nr, newBufIdx, node->offset + nl );
1102

1103
    splitInputData = node->depth + 1 < data->params.max_depth &&
1104
        (node->left->sample_count > data->params.min_sample_count ||
1105
        node->right->sample_count > data->params.min_sample_count);
1106

1107
    // split ordered variables, keep both halves sorted.
1108
    for( int vi = 0; vi < ((CvCascadeBoostTrainData*)data)->numPrecalcIdx; vi++ )
1109
    {
1110
        int ci = data->get_var_type(vi);
1111
        if( ci >= 0 || !splitInputData )
1112
            continue;
1113

1114
        int n1 = node->get_num_valid(vi);
1115
        float *src_val_buf = (float*)(tempBuf + n);
1116
        int *src_sorted_idx_buf = (int*)(src_val_buf + n);
1117
        int *src_sample_idx_buf = src_sorted_idx_buf + n;
1118
        const int* src_sorted_idx = 0;
1119
        const float* src_val = 0;
1120
        data->get_ord_var_data(node, vi, src_val_buf, src_sorted_idx_buf, &src_val, &src_sorted_idx, src_sample_idx_buf);
1121

1122
        for(int i = 0; i < n; i++)
1123
            tempBuf[i] = src_sorted_idx[i];
1124

1125
        if (data->is_buf_16u)
1126
        {
1127
            ushort *ldst, *rdst;
1128
            ldst = (ushort*)(buf->data.s + left->buf_idx*length_buf_row +
1129
                vi*scount + left->offset);
1130
            rdst = (ushort*)(ldst + nl);
1131

1132
            // split sorted
1133
            for( int i = 0; i < n1; i++ )
1134
            {
1135
                int idx = tempBuf[i];
1136
                int d = dir[idx];
1137
                idx = newIdx[idx];
1138
                if (d)
1139
                {
1140
                    *rdst = (ushort)idx;
1141
                    rdst++;
1142
                }
1143
                else
1144
                {
1145
                    *ldst = (ushort)idx;
1146
                    ldst++;
1147
                }
1148
            }
1149
            CV_Assert( n1 == n );
1150
        }
1151
        else
1152
        {
1153
            int *ldst, *rdst;
1154
            ldst = buf->data.i + left->buf_idx*length_buf_row +
1155
                vi*scount + left->offset;
1156
            rdst = buf->data.i + right->buf_idx*length_buf_row +
1157
                vi*scount + right->offset;
1158

1159
            // split sorted
1160
            for( int i = 0; i < n1; i++ )
1161
            {
1162
                int idx = tempBuf[i];
1163
                int d = dir[idx];
1164
                idx = newIdx[idx];
1165
                if (d)
1166
                {
1167
                    *rdst = idx;
1168
                    rdst++;
1169
                }
1170
                else
1171
                {
1172
                    *ldst = idx;
1173
                    ldst++;
1174
                }
1175
            }
1176
            CV_Assert( n1 == n );
1177
        }
1178
    }
1179

1180
    // split cv_labels using newIdx relocation table
1181
    int *src_lbls_buf = tempBuf + n;
1182
    const int* src_lbls = data->get_cv_labels(node, src_lbls_buf);
1183

1184
    for(int i = 0; i < n; i++)
1185
        tempBuf[i] = src_lbls[i];
1186

1187
    if (data->is_buf_16u)
1188
    {
1189
        unsigned short *ldst = (unsigned short *)(buf->data.s + left->buf_idx*length_buf_row +
1190
            (size_t)(workVarCount-1)*scount + left->offset);
1191
        unsigned short *rdst = (unsigned short *)(buf->data.s + right->buf_idx*length_buf_row +
1192
            (size_t)(workVarCount-1)*scount + right->offset);
1193

1194
        for( int i = 0; i < n; i++ )
1195
        {
1196
            int idx = tempBuf[i];
1197
            if (dir[i])
1198
            {
1199
                *rdst = (unsigned short)idx;
1200
                rdst++;
1201
            }
1202
            else
1203
            {
1204
                *ldst = (unsigned short)idx;
1205
                ldst++;
1206
            }
1207
        }
1208

1209
    }
1210
    else
1211
    {
1212
        int *ldst = buf->data.i + left->buf_idx*length_buf_row +
1213
            (size_t)(workVarCount-1)*scount + left->offset;
1214
        int *rdst = buf->data.i + right->buf_idx*length_buf_row +
1215
            (size_t)(workVarCount-1)*scount + right->offset;
1216

1217
        for( int i = 0; i < n; i++ )
1218
        {
1219
            int idx = tempBuf[i];
1220
            if (dir[i])
1221
            {
1222
                *rdst = idx;
1223
                rdst++;
1224
            }
1225
            else
1226
            {
1227
                *ldst = idx;
1228
                ldst++;
1229
            }
1230
        }
1231
    }
1232

1233
    // split sample indices
1234
    int *sampleIdx_src_buf = tempBuf + n;
1235
    const int* sampleIdx_src = data->get_sample_indices(node, sampleIdx_src_buf);
1236

1237
    for(int i = 0; i < n; i++)
1238
        tempBuf[i] = sampleIdx_src[i];
1239

1240
    if (data->is_buf_16u)
1241
    {
1242
        unsigned short* ldst = (unsigned short*)(buf->data.s + left->buf_idx*length_buf_row +
1243
            (size_t)workVarCount*scount + left->offset);
1244
        unsigned short* rdst = (unsigned short*)(buf->data.s + right->buf_idx*length_buf_row +
1245
            (size_t)workVarCount*scount + right->offset);
1246
        for (int i = 0; i < n; i++)
1247
        {
1248
            unsigned short idx = (unsigned short)tempBuf[i];
1249
            if (dir[i])
1250
            {
1251
                *rdst = idx;
1252
                rdst++;
1253
            }
1254
            else
1255
            {
1256
                *ldst = idx;
1257
                ldst++;
1258
            }
1259
        }
1260
    }
1261
    else
1262
    {
1263
        int* ldst = buf->data.i + left->buf_idx*length_buf_row +
1264
            (size_t)workVarCount*scount + left->offset;
1265
        int* rdst = buf->data.i + right->buf_idx*length_buf_row +
1266
            (size_t)workVarCount*scount + right->offset;
1267
        for (int i = 0; i < n; i++)
1268
        {
1269
            int idx = tempBuf[i];
1270
            if (dir[i])
1271
            {
1272
                *rdst = idx;
1273
                rdst++;
1274
            }
1275
            else
1276
            {
1277
                *ldst = idx;
1278
                ldst++;
1279
            }
1280
        }
1281
    }
1282

1283
    for( int vi = 0; vi < data->var_count; vi++ )
1284
    {
1285
        left->set_num_valid(vi, (int)(nl));
1286
        right->set_num_valid(vi, (int)(nr));
1287
    }
1288

1289
    // deallocate the parent node data that is not needed anymore
1290
    data->free_node_data(node);
1291
}
1292

1293
static void auxMarkFeaturesInMap( const CvDTreeNode* node, Mat& featureMap)
1294
{
1295
    if ( node && node->split )
1296
    {
1297
        featureMap.ptr<int>(0)[node->split->var_idx] = 1;
1298
        auxMarkFeaturesInMap( node->left, featureMap );
1299
        auxMarkFeaturesInMap( node->right, featureMap );
1300
    }
1301
}
1302

1303
void CvCascadeBoostTree::markFeaturesInMap( Mat& featureMap )
1304
{
1305
    auxMarkFeaturesInMap( root, featureMap );
1306
}
1307

1308
//----------------------------------- CascadeBoost --------------------------------------
1309

1310
bool CvCascadeBoost::train( const CvFeatureEvaluator* _featureEvaluator,
1311
                           int _numSamples,
1312
                           int _precalcValBufSize, int _precalcIdxBufSize,
1313
                           const CvCascadeBoostParams& _params )
1314
{
1315
    bool isTrained = false;
1316
    CV_Assert( !data );
1317
    clear();
1318
    data = new CvCascadeBoostTrainData( _featureEvaluator, _numSamples,
1319
                                        _precalcValBufSize, _precalcIdxBufSize, _params );
1320
    CvMemStorage *storage = cvCreateMemStorage();
1321
    weak = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvBoostTree*), storage );
1322
    storage = 0;
1323

1324
    set_params( _params );
1325
    if ( (_params.boost_type == LOGIT) || (_params.boost_type == GENTLE) )
1326
        data->do_responses_copy();
1327

1328
    update_weights( 0 );
1329

1330
    cout << "+----+---------+---------+" << endl;
1331
    cout << "|  N |    HR   |    FA   |" << endl;
1332
    cout << "+----+---------+---------+" << endl;
1333

1334
    do
1335
    {
1336
        CvCascadeBoostTree* tree = new CvCascadeBoostTree;
1337
        if( !tree->train( data, subsample_mask, this ) )
1338
        {
1339
            delete tree;
1340
            break;
1341
        }
1342
        cvSeqPush( weak, &tree );
1343
        update_weights( tree );
1344
        trim_weights();
1345
        if( cvCountNonZero(subsample_mask) == 0 )
1346
            break;
1347
    }
1348
    while( !isErrDesired() && (weak->total < params.weak_count) );
1349

1350
    if(weak->total > 0)
1351
    {
1352
        data->is_classifier = true;
1353
        data->free_train_data();
1354
        isTrained = true;
1355
    }
1356
    else
1357
        clear();
1358

1359
    return isTrained;
1360
}
1361

1362
float CvCascadeBoost::predict( int sampleIdx, bool returnSum ) const
1363
{
1364
    CV_Assert( weak );
1365
    double sum = 0;
1366
    CvSeqReader reader;
1367
    cvStartReadSeq( weak, &reader );
1368
    cvSetSeqReaderPos( &reader, 0 );
1369
    for( int i = 0; i < weak->total; i++ )
1370
    {
1371
        CvBoostTree* wtree;
1372
        CV_READ_SEQ_ELEM( wtree, reader );
1373
        sum += ((CvCascadeBoostTree*)wtree)->predict(sampleIdx)->value;
1374
    }
1375
    if( !returnSum )
1376
        sum = sum < threshold - CV_THRESHOLD_EPS ? 0.0 : 1.0;
1377
    return (float)sum;
1378
}
1379

1380
bool CvCascadeBoost::set_params( const CvBoostParams& _params )
1381
{
1382
    minHitRate = ((CvCascadeBoostParams&)_params).minHitRate;
1383
    maxFalseAlarm = ((CvCascadeBoostParams&)_params).maxFalseAlarm;
1384
    return ( ( minHitRate > 0 ) && ( minHitRate < 1) &&
1385
        ( maxFalseAlarm > 0 ) && ( maxFalseAlarm < 1) &&
1386
        CvBoost::set_params( _params ));
1387
}
1388

1389
void CvCascadeBoost::update_weights( CvBoostTree* tree )
1390
{
1391
    int n = data->sample_count;
1392
    double sumW = 0.;
1393
    int step = 0;
1394
    float* fdata = 0;
1395
    int *sampleIdxBuf;
1396
    const int* sampleIdx = 0;
1397
    int inn_buf_size = ((params.boost_type == LOGIT) || (params.boost_type == GENTLE) ? n*sizeof(int) : 0) +
1398
                       ( !tree ? n*sizeof(int) : 0 );
1399
    cv::AutoBuffer<uchar> inn_buf(inn_buf_size);
1400
    uchar* cur_inn_buf_pos = inn_buf.data();
1401
    if ( (params.boost_type == LOGIT) || (params.boost_type == GENTLE) )
1402
    {
1403
        step = CV_IS_MAT_CONT(data->responses_copy->type) ?
1404
            1 : data->responses_copy->step / CV_ELEM_SIZE(data->responses_copy->type);
1405
        fdata = data->responses_copy->data.fl;
1406
        sampleIdxBuf = (int*)cur_inn_buf_pos; cur_inn_buf_pos = (uchar*)(sampleIdxBuf + n);
1407
        sampleIdx = data->get_sample_indices( data->data_root, sampleIdxBuf );
1408
    }
1409
    CvMat* buf = data->buf;
1410
    size_t length_buf_row = data->get_length_subbuf();
1411
    if( !tree ) // before training the first tree, initialize weights and other parameters
1412
    {
1413
        int* classLabelsBuf = (int*)cur_inn_buf_pos; cur_inn_buf_pos = (uchar*)(classLabelsBuf + n);
1414
        const int* classLabels = data->get_class_labels(data->data_root, classLabelsBuf);
1415
        // in case of logitboost and gentle adaboost each weak tree is a regression tree,
1416
        // so we need to convert class labels to floating-point values
1417
        double w0 = 1./n;
1418
        double p[2] = { 1, 1 };
1419

1420
        cvReleaseMat( &orig_response );
1421
        cvReleaseMat( &sum_response );
1422
        cvReleaseMat( &weak_eval );
1423
        cvReleaseMat( &subsample_mask );
1424
        cvReleaseMat( &weights );
1425

1426
        orig_response = cvCreateMat( 1, n, CV_32S );
1427
        weak_eval = cvCreateMat( 1, n, CV_64F );
1428
        subsample_mask = cvCreateMat( 1, n, CV_8U );
1429
        weights = cvCreateMat( 1, n, CV_64F );
1430
        subtree_weights = cvCreateMat( 1, n + 2, CV_64F );
1431

1432
        if (data->is_buf_16u)
1433
        {
1434
            unsigned short* labels = (unsigned short*)(buf->data.s + data->data_root->buf_idx*length_buf_row +
1435
                data->data_root->offset + (size_t)(data->work_var_count-1)*data->sample_count);
1436
            for( int i = 0; i < n; i++ )
1437
            {
1438
                // save original categorical responses {0,1}, convert them to {-1,1}
1439
                orig_response->data.i[i] = classLabels[i]*2 - 1;
1440
                // make all the samples active at start.
1441
                // later, in trim_weights() deactivate/reactive again some, if need
1442
                subsample_mask->data.ptr[i] = (uchar)1;
1443
                // make all the initial weights the same.
1444
                weights->data.db[i] = w0*p[classLabels[i]];
1445
                // set the labels to find (from within weak tree learning proc)
1446
                // the particular sample weight, and where to store the response.
1447
                labels[i] = (unsigned short)i;
1448
            }
1449
        }
1450
        else
1451
        {
1452
            int* labels = buf->data.i + data->data_root->buf_idx*length_buf_row +
1453
                data->data_root->offset + (size_t)(data->work_var_count-1)*data->sample_count;
1454

1455
            for( int i = 0; i < n; i++ )
1456
            {
1457
                // save original categorical responses {0,1}, convert them to {-1,1}
1458
                orig_response->data.i[i] = classLabels[i]*2 - 1;
1459
                subsample_mask->data.ptr[i] = (uchar)1;
1460
                weights->data.db[i] = w0*p[classLabels[i]];
1461
                labels[i] = i;
1462
            }
1463
        }
1464

1465
        if( params.boost_type == LOGIT )
1466
        {
1467
            sum_response = cvCreateMat( 1, n, CV_64F );
1468

1469
            for( int i = 0; i < n; i++ )
1470
            {
1471
                sum_response->data.db[i] = 0;
1472
                fdata[sampleIdx[i]*step] = orig_response->data.i[i] > 0 ? 2.f : -2.f;
1473
            }
1474

1475
            // in case of logitboost each weak tree is a regression tree.
1476
            // the target function values are recalculated for each of the trees
1477
            data->is_classifier = false;
1478
        }
1479
        else if( params.boost_type == GENTLE )
1480
        {
1481
            for( int i = 0; i < n; i++ )
1482
                fdata[sampleIdx[i]*step] = (float)orig_response->data.i[i];
1483

1484
            data->is_classifier = false;
1485
        }
1486
    }
1487
    else
1488
    {
1489
        // at this moment, for all the samples that participated in the training of the most
1490
        // recent weak classifier we know the responses. For other samples we need to compute them
1491
        if( have_subsample )
1492
        {
1493
            // invert the subsample mask
1494
            cvXorS( subsample_mask, cvScalar(1.), subsample_mask );
1495

1496
            // run tree through all the non-processed samples
1497
            for( int i = 0; i < n; i++ )
1498
                if( subsample_mask->data.ptr[i] )
1499
                {
1500
                    weak_eval->data.db[i] = ((CvCascadeBoostTree*)tree)->predict( i )->value;
1501
                }
1502
        }
1503

1504
        // now update weights and other parameters for each type of boosting
1505
        if( params.boost_type == DISCRETE )
1506
        {
1507
            // Discrete AdaBoost:
1508
            //   weak_eval[i] (=f(x_i)) is in {-1,1}
1509
            //   err = sum(w_i*(f(x_i) != y_i))/sum(w_i)
1510
            //   C = log((1-err)/err)
1511
            //   w_i *= exp(C*(f(x_i) != y_i))
1512

1513
            double C, err = 0.;
1514
            double scale[] = { 1., 0. };
1515

1516
            for( int i = 0; i < n; i++ )
1517
            {
1518
                double w = weights->data.db[i];
1519
                sumW += w;
1520
                err += w*(weak_eval->data.db[i] != orig_response->data.i[i]);
1521
            }
1522

1523
            if( sumW != 0 )
1524
                err /= sumW;
1525
            C = err = -logRatio( err );
1526
            scale[1] = exp(err);
1527

1528
            sumW = 0;
1529
            for( int i = 0; i < n; i++ )
1530
            {
1531
                double w = weights->data.db[i]*
1532
                    scale[weak_eval->data.db[i] != orig_response->data.i[i]];
1533
                sumW += w;
1534
                weights->data.db[i] = w;
1535
            }
1536

1537
            tree->scale( C );
1538
        }
1539
        else if( params.boost_type == REAL )
1540
        {
1541
            // Real AdaBoost:
1542
            //   weak_eval[i] = f(x_i) = 0.5*log(p(x_i)/(1-p(x_i))), p(x_i)=P(y=1|x_i)
1543
            //   w_i *= exp(-y_i*f(x_i))
1544

1545
            for( int i = 0; i < n; i++ )
1546
                weak_eval->data.db[i] *= -orig_response->data.i[i];
1547

1548
            cvExp( weak_eval, weak_eval );
1549

1550
            for( int i = 0; i < n; i++ )
1551
            {
1552
                double w = weights->data.db[i]*weak_eval->data.db[i];
1553
                sumW += w;
1554
                weights->data.db[i] = w;
1555
            }
1556
        }
1557
        else if( params.boost_type == LOGIT )
1558
        {
1559
            // LogitBoost:
1560
            //   weak_eval[i] = f(x_i) in [-z_max,z_max]
1561
            //   sum_response = F(x_i).
1562
            //   F(x_i) += 0.5*f(x_i)
1563
            //   p(x_i) = exp(F(x_i))/(exp(F(x_i)) + exp(-F(x_i))=1/(1+exp(-2*F(x_i)))
1564
            //   reuse weak_eval: weak_eval[i] <- p(x_i)
1565
            //   w_i = p(x_i)*1(1 - p(x_i))
1566
            //   z_i = ((y_i+1)/2 - p(x_i))/(p(x_i)*(1 - p(x_i)))
1567
            //   store z_i to the data->data_root as the new target responses
1568

1569
            const double lbWeightThresh = FLT_EPSILON;
1570
            const double lbZMax = 10.;
1571

1572
            for( int i = 0; i < n; i++ )
1573
            {
1574
                double s = sum_response->data.db[i] + 0.5*weak_eval->data.db[i];
1575
                sum_response->data.db[i] = s;
1576
                weak_eval->data.db[i] = -2*s;
1577
            }
1578

1579
            cvExp( weak_eval, weak_eval );
1580

1581
            for( int i = 0; i < n; i++ )
1582
            {
1583
                double p = 1./(1. + weak_eval->data.db[i]);
1584
                double w = p*(1 - p), z;
1585
                w = MAX( w, lbWeightThresh );
1586
                weights->data.db[i] = w;
1587
                sumW += w;
1588
                if( orig_response->data.i[i] > 0 )
1589
                {
1590
                    z = 1./p;
1591
                    fdata[sampleIdx[i]*step] = (float)min(z, lbZMax);
1592
                }
1593
                else
1594
                {
1595
                    z = 1./(1-p);
1596
                    fdata[sampleIdx[i]*step] = (float)-min(z, lbZMax);
1597
                }
1598
            }
1599
        }
1600
        else
1601
        {
1602
            // Gentle AdaBoost:
1603
            //   weak_eval[i] = f(x_i) in [-1,1]
1604
            //   w_i *= exp(-y_i*f(x_i))
1605
            assert( params.boost_type == GENTLE );
1606

1607
            for( int i = 0; i < n; i++ )
1608
                weak_eval->data.db[i] *= -orig_response->data.i[i];
1609

1610
            cvExp( weak_eval, weak_eval );
1611

1612
            for( int i = 0; i < n; i++ )
1613
            {
1614
                double w = weights->data.db[i] * weak_eval->data.db[i];
1615
                weights->data.db[i] = w;
1616
                sumW += w;
1617
            }
1618
        }
1619
    }
1620

1621
    // renormalize weights
1622
    if( sumW > FLT_EPSILON )
1623
    {
1624
        sumW = 1./sumW;
1625
        for( int i = 0; i < n; ++i )
1626
            weights->data.db[i] *= sumW;
1627
    }
1628
}
1629

1630
bool CvCascadeBoost::isErrDesired()
1631
{
1632
    int sCount = data->sample_count,
1633
        numPos = 0, numNeg = 0, numFalse = 0, numPosTrue = 0;
1634
    vector<float> eval(sCount);
1635

1636
    for( int i = 0; i < sCount; i++ )
1637
        if( ((CvCascadeBoostTrainData*)data)->featureEvaluator->getCls( i ) == 1.0F )
1638
            eval[numPos++] = predict( i, true );
1639

1640
    std::sort(&eval[0], &eval[0] + numPos);
1641

1642
    int thresholdIdx = (int)((1.0F - minHitRate) * numPos);
1643

1644
    threshold = eval[ thresholdIdx ];
1645
    numPosTrue = numPos - thresholdIdx;
1646
    for( int i = thresholdIdx - 1; i >= 0; i--)
1647
        if ( abs( eval[i] - threshold) < FLT_EPSILON )
1648
            numPosTrue++;
1649
    float hitRate = ((float) numPosTrue) / ((float) numPos);
1650

1651
    for( int i = 0; i < sCount; i++ )
1652
    {
1653
        if( ((CvCascadeBoostTrainData*)data)->featureEvaluator->getCls( i ) == 0.0F )
1654
        {
1655
            numNeg++;
1656
            if( predict( i ) )
1657
                numFalse++;
1658
        }
1659
    }
1660
    float falseAlarm = ((float) numFalse) / ((float) numNeg);
1661

1662
    cout << "|"; cout.width(4); cout << right << weak->total;
1663
    cout << "|"; cout.width(9); cout << right << hitRate;
1664
    cout << "|"; cout.width(9); cout << right << falseAlarm;
1665
    cout << "|" << endl;
1666
    cout << "+----+---------+---------+" << endl;
1667

1668
    return falseAlarm <= maxFalseAlarm;
1669
}
1670

1671
void CvCascadeBoost::write( FileStorage &fs, const Mat& featureMap ) const
1672
{
1673
//    char cmnt[30];
1674
    CvCascadeBoostTree* weakTree;
1675
    fs << CC_WEAK_COUNT << weak->total;
1676
    fs << CC_STAGE_THRESHOLD << threshold;
1677
    fs << CC_WEAK_CLASSIFIERS << "[";
1678
    for( int wi = 0; wi < weak->total; wi++)
1679
    {
1680
        /*sprintf( cmnt, "tree %i", wi );
1681
        cvWriteComment( fs, cmnt, 0 );*/
1682
        weakTree = *((CvCascadeBoostTree**) cvGetSeqElem( weak, wi ));
1683
        weakTree->write( fs, featureMap );
1684
    }
1685
    fs << "]";
1686
}
1687

1688
bool CvCascadeBoost::read( const FileNode &node,
1689
                           const CvFeatureEvaluator* _featureEvaluator,
1690
                           const CvCascadeBoostParams& _params )
1691
{
1692
    CvMemStorage* storage;
1693
    clear();
1694
    data = new CvCascadeBoostTrainData( _featureEvaluator, _params );
1695
    set_params( _params );
1696

1697
    node[CC_STAGE_THRESHOLD] >> threshold;
1698
    FileNode rnode = node[CC_WEAK_CLASSIFIERS];
1699

1700
    storage = cvCreateMemStorage();
1701
    weak = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvBoostTree*), storage );
1702
    for( FileNodeIterator it = rnode.begin(); it != rnode.end(); it++ )
1703
    {
1704
        CvCascadeBoostTree* tree = new CvCascadeBoostTree();
1705
        tree->read( *it, this, data );
1706
        cvSeqPush( weak, &tree );
1707
    }
1708
    return true;
1709
}
1710

1711
void CvCascadeBoost::markUsedFeaturesInMap( Mat& featureMap )
1712
{
1713
    for( int wi = 0; wi < weak->total; wi++ )
1714
    {
1715
        CvCascadeBoostTree* weakTree = *((CvCascadeBoostTree**) cvGetSeqElem( weak, wi ));
1716
        weakTree->markFeaturesInMap( featureMap );
1717
    }
1718
}
1719

1720
Product

Resources

Company