Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/apps/traincascade/old_ml_boost.cpp
16337 views
1
/*M///////////////////////////////////////////////////////////////////////////////////////
2
//
3
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4
//
5
// By downloading, copying, installing or using the software you agree to this license.
6
// If you do not agree to this license, do not download, install,
7
// copy or use the software.
8
//
9
//
10
// Intel License Agreement
11
//
12
// Copyright (C) 2000, Intel Corporation, all rights reserved.
13
// Third party copyrights are property of their respective owners.
14
//
15
// Redistribution and use in source and binary forms, with or without modification,
16
// are permitted provided that the following conditions are met:
17
//
18
// * Redistribution's of source code must retain the above copyright notice,
19
// this list of conditions and the following disclaimer.
20
//
21
// * Redistribution's in binary form must reproduce the above copyright notice,
22
// this list of conditions and the following disclaimer in the documentation
23
// and/or other materials provided with the distribution.
24
//
25
// * The name of Intel Corporation may not be used to endorse or promote products
26
// derived from this software without specific prior written permission.
27
//
28
// This software is provided by the copyright holders and contributors "as is" and
29
// any express or implied warranties, including, but not limited to, the implied
30
// warranties of merchantability and fitness for a particular purpose are disclaimed.
31
// In no event shall the Intel Corporation or contributors be liable for any direct,
32
// indirect, incidental, special, exemplary, or consequential damages
33
// (including, but not limited to, procurement of substitute goods or services;
34
// loss of use, data, or profits; or business interruption) however caused
35
// and on any theory of liability, whether in contract, strict liability,
36
// or tort (including negligence or otherwise) arising in any way out of
37
// the use of this software, even if advised of the possibility of such damage.
38
//
39
//M*/
40
41
#include "old_ml_precomp.hpp"
42
43
static inline double
44
log_ratio( double val )
45
{
46
const double eps = 1e-5;
47
48
val = MAX( val, eps );
49
val = MIN( val, 1. - eps );
50
return log( val/(1. - val) );
51
}
52
53
54
CvBoostParams::CvBoostParams()
55
{
56
boost_type = CvBoost::REAL;
57
weak_count = 100;
58
weight_trim_rate = 0.95;
59
cv_folds = 0;
60
max_depth = 1;
61
}
62
63
64
CvBoostParams::CvBoostParams( int _boost_type, int _weak_count,
65
double _weight_trim_rate, int _max_depth,
66
bool _use_surrogates, const float* _priors )
67
{
68
boost_type = _boost_type;
69
weak_count = _weak_count;
70
weight_trim_rate = _weight_trim_rate;
71
split_criteria = CvBoost::DEFAULT;
72
cv_folds = 0;
73
max_depth = _max_depth;
74
use_surrogates = _use_surrogates;
75
priors = _priors;
76
}
77
78
79
80
///////////////////////////////// CvBoostTree ///////////////////////////////////
81
82
CvBoostTree::CvBoostTree()
83
{
84
ensemble = 0;
85
}
86
87
88
CvBoostTree::~CvBoostTree()
89
{
90
clear();
91
}
92
93
94
void
95
CvBoostTree::clear()
96
{
97
CvDTree::clear();
98
ensemble = 0;
99
}
100
101
102
bool
103
CvBoostTree::train( CvDTreeTrainData* _train_data,
104
const CvMat* _subsample_idx, CvBoost* _ensemble )
105
{
106
clear();
107
ensemble = _ensemble;
108
data = _train_data;
109
data->shared = true;
110
return do_train( _subsample_idx );
111
}
112
113
114
bool
115
CvBoostTree::train( const CvMat*, int, const CvMat*, const CvMat*,
116
const CvMat*, const CvMat*, const CvMat*, CvDTreeParams )
117
{
118
assert(0);
119
return false;
120
}
121
122
123
bool
124
CvBoostTree::train( CvDTreeTrainData*, const CvMat* )
125
{
126
assert(0);
127
return false;
128
}
129
130
131
void
132
CvBoostTree::scale( double _scale )
133
{
134
CvDTreeNode* node = root;
135
136
// traverse the tree and scale all the node values
137
for(;;)
138
{
139
CvDTreeNode* parent;
140
for(;;)
141
{
142
node->value *= _scale;
143
if( !node->left )
144
break;
145
node = node->left;
146
}
147
148
for( parent = node->parent; parent && parent->right == node;
149
node = parent, parent = parent->parent )
150
;
151
152
if( !parent )
153
break;
154
155
node = parent->right;
156
}
157
}
158
159
160
void
161
CvBoostTree::try_split_node( CvDTreeNode* node )
162
{
163
CvDTree::try_split_node( node );
164
165
if( !node->left )
166
{
167
// if the node has not been split,
168
// store the responses for the corresponding training samples
169
double* weak_eval = ensemble->get_weak_response()->data.db;
170
cv::AutoBuffer<int> inn_buf(node->sample_count);
171
const int* labels = data->get_cv_labels(node, inn_buf.data());
172
int i, count = node->sample_count;
173
double value = node->value;
174
175
for( i = 0; i < count; i++ )
176
weak_eval[labels[i]] = value;
177
}
178
}
179
180
181
double
182
CvBoostTree::calc_node_dir( CvDTreeNode* node )
183
{
184
char* dir = (char*)data->direction->data.ptr;
185
const double* weights = ensemble->get_subtree_weights()->data.db;
186
int i, n = node->sample_count, vi = node->split->var_idx;
187
double L, R;
188
189
assert( !node->split->inversed );
190
191
if( data->get_var_type(vi) >= 0 ) // split on categorical var
192
{
193
cv::AutoBuffer<int> inn_buf(n);
194
const int* cat_labels = data->get_cat_var_data(node, vi, inn_buf.data());
195
const int* subset = node->split->subset;
196
double sum = 0, sum_abs = 0;
197
198
for( i = 0; i < n; i++ )
199
{
200
int idx = ((cat_labels[i] == 65535) && data->is_buf_16u) ? -1 : cat_labels[i];
201
double w = weights[i];
202
int d = idx >= 0 ? CV_DTREE_CAT_DIR(idx,subset) : 0;
203
sum += d*w; sum_abs += (d & 1)*w;
204
dir[i] = (char)d;
205
}
206
207
R = (sum_abs + sum) * 0.5;
208
L = (sum_abs - sum) * 0.5;
209
}
210
else // split on ordered var
211
{
212
cv::AutoBuffer<uchar> inn_buf(2*n*sizeof(int)+n*sizeof(float));
213
float* values_buf = (float*)inn_buf.data();
214
int* sorted_indices_buf = (int*)(values_buf + n);
215
int* sample_indices_buf = sorted_indices_buf + n;
216
const float* values = 0;
217
const int* sorted_indices = 0;
218
data->get_ord_var_data( node, vi, values_buf, sorted_indices_buf, &values, &sorted_indices, sample_indices_buf );
219
int split_point = node->split->ord.split_point;
220
int n1 = node->get_num_valid(vi);
221
222
assert( 0 <= split_point && split_point < n1-1 );
223
L = R = 0;
224
225
for( i = 0; i <= split_point; i++ )
226
{
227
int idx = sorted_indices[i];
228
double w = weights[idx];
229
dir[idx] = (char)-1;
230
L += w;
231
}
232
233
for( ; i < n1; i++ )
234
{
235
int idx = sorted_indices[i];
236
double w = weights[idx];
237
dir[idx] = (char)1;
238
R += w;
239
}
240
241
for( ; i < n; i++ )
242
dir[sorted_indices[i]] = (char)0;
243
}
244
245
node->maxlr = MAX( L, R );
246
return node->split->quality/(L + R);
247
}
248
249
250
CvDTreeSplit*
251
CvBoostTree::find_split_ord_class( CvDTreeNode* node, int vi, float init_quality,
252
CvDTreeSplit* _split, uchar* _ext_buf )
253
{
254
const float epsilon = FLT_EPSILON*2;
255
256
const double* weights = ensemble->get_subtree_weights()->data.db;
257
int n = node->sample_count;
258
int n1 = node->get_num_valid(vi);
259
260
cv::AutoBuffer<uchar> inn_buf;
261
if( !_ext_buf )
262
inn_buf.allocate(n*(3*sizeof(int)+sizeof(float)));
263
uchar* ext_buf = _ext_buf ? _ext_buf : inn_buf.data();
264
float* values_buf = (float*)ext_buf;
265
int* sorted_indices_buf = (int*)(values_buf + n);
266
int* sample_indices_buf = sorted_indices_buf + n;
267
const float* values = 0;
268
const int* sorted_indices = 0;
269
data->get_ord_var_data( node, vi, values_buf, sorted_indices_buf, &values, &sorted_indices, sample_indices_buf );
270
int* responses_buf = sorted_indices_buf + n;
271
const int* responses = data->get_class_labels( node, responses_buf );
272
const double* rcw0 = weights + n;
273
double lcw[2] = {0,0}, rcw[2];
274
int i, best_i = -1;
275
double best_val = init_quality;
276
int boost_type = ensemble->get_params().boost_type;
277
int split_criteria = ensemble->get_params().split_criteria;
278
279
rcw[0] = rcw0[0]; rcw[1] = rcw0[1];
280
for( i = n1; i < n; i++ )
281
{
282
int idx = sorted_indices[i];
283
double w = weights[idx];
284
rcw[responses[idx]] -= w;
285
}
286
287
if( split_criteria != CvBoost::GINI && split_criteria != CvBoost::MISCLASS )
288
split_criteria = boost_type == CvBoost::DISCRETE ? CvBoost::MISCLASS : CvBoost::GINI;
289
290
if( split_criteria == CvBoost::GINI )
291
{
292
double L = 0, R = rcw[0] + rcw[1];
293
double lsum2 = 0, rsum2 = rcw[0]*rcw[0] + rcw[1]*rcw[1];
294
295
for( i = 0; i < n1 - 1; i++ )
296
{
297
int idx = sorted_indices[i];
298
double w = weights[idx], w2 = w*w;
299
double lv, rv;
300
idx = responses[idx];
301
L += w; R -= w;
302
lv = lcw[idx]; rv = rcw[idx];
303
lsum2 += 2*lv*w + w2;
304
rsum2 -= 2*rv*w - w2;
305
lcw[idx] = lv + w; rcw[idx] = rv - w;
306
307
if( values[i] + epsilon < values[i+1] )
308
{
309
double val = (lsum2*R + rsum2*L)/(L*R);
310
if( best_val < val )
311
{
312
best_val = val;
313
best_i = i;
314
}
315
}
316
}
317
}
318
else
319
{
320
for( i = 0; i < n1 - 1; i++ )
321
{
322
int idx = sorted_indices[i];
323
double w = weights[idx];
324
idx = responses[idx];
325
lcw[idx] += w;
326
rcw[idx] -= w;
327
328
if( values[i] + epsilon < values[i+1] )
329
{
330
double val = lcw[0] + rcw[1], val2 = lcw[1] + rcw[0];
331
val = MAX(val, val2);
332
if( best_val < val )
333
{
334
best_val = val;
335
best_i = i;
336
}
337
}
338
}
339
}
340
341
CvDTreeSplit* split = 0;
342
if( best_i >= 0 )
343
{
344
split = _split ? _split : data->new_split_ord( 0, 0.0f, 0, 0, 0.0f );
345
split->var_idx = vi;
346
split->ord.c = (values[best_i] + values[best_i+1])*0.5f;
347
split->ord.split_point = best_i;
348
split->inversed = 0;
349
split->quality = (float)best_val;
350
}
351
return split;
352
}
353
354
template<typename T>
355
class LessThanPtr
356
{
357
public:
358
bool operator()(T* a, T* b) const { return *a < *b; }
359
};
360
361
CvDTreeSplit*
362
CvBoostTree::find_split_cat_class( CvDTreeNode* node, int vi, float init_quality, CvDTreeSplit* _split, uchar* _ext_buf )
363
{
364
int ci = data->get_var_type(vi);
365
int n = node->sample_count;
366
int mi = data->cat_count->data.i[ci];
367
368
int base_size = (2*mi+3)*sizeof(double) + mi*sizeof(double*);
369
cv::AutoBuffer<uchar> inn_buf((2*mi+3)*sizeof(double) + mi*sizeof(double*));
370
if( !_ext_buf)
371
inn_buf.allocate( base_size + 2*n*sizeof(int) );
372
uchar* base_buf = inn_buf.data();
373
uchar* ext_buf = _ext_buf ? _ext_buf : base_buf + base_size;
374
375
int* cat_labels_buf = (int*)ext_buf;
376
const int* cat_labels = data->get_cat_var_data(node, vi, cat_labels_buf);
377
int* responses_buf = cat_labels_buf + n;
378
const int* responses = data->get_class_labels(node, responses_buf);
379
double lcw[2]={0,0}, rcw[2]={0,0};
380
381
double* cjk = (double*)cv::alignPtr(base_buf,sizeof(double))+2;
382
const double* weights = ensemble->get_subtree_weights()->data.db;
383
double** dbl_ptr = (double**)(cjk + 2*mi);
384
int i, j, k, idx;
385
double L = 0, R;
386
double best_val = init_quality;
387
int best_subset = -1, subset_i;
388
int boost_type = ensemble->get_params().boost_type;
389
int split_criteria = ensemble->get_params().split_criteria;
390
391
// init array of counters:
392
// c_{jk} - number of samples that have vi-th input variable = j and response = k.
393
for( j = -1; j < mi; j++ )
394
cjk[j*2] = cjk[j*2+1] = 0;
395
396
for( i = 0; i < n; i++ )
397
{
398
double w = weights[i];
399
j = ((cat_labels[i] == 65535) && data->is_buf_16u) ? -1 : cat_labels[i];
400
k = responses[i];
401
cjk[j*2 + k] += w;
402
}
403
404
for( j = 0; j < mi; j++ )
405
{
406
rcw[0] += cjk[j*2];
407
rcw[1] += cjk[j*2+1];
408
dbl_ptr[j] = cjk + j*2 + 1;
409
}
410
411
R = rcw[0] + rcw[1];
412
413
if( split_criteria != CvBoost::GINI && split_criteria != CvBoost::MISCLASS )
414
split_criteria = boost_type == CvBoost::DISCRETE ? CvBoost::MISCLASS : CvBoost::GINI;
415
416
// sort rows of c_jk by increasing c_j,1
417
// (i.e. by the weight of samples in j-th category that belong to class 1)
418
std::sort(dbl_ptr, dbl_ptr + mi, LessThanPtr<double>());
419
420
for( subset_i = 0; subset_i < mi-1; subset_i++ )
421
{
422
idx = (int)(dbl_ptr[subset_i] - cjk)/2;
423
const double* crow = cjk + idx*2;
424
double w0 = crow[0], w1 = crow[1];
425
double weight = w0 + w1;
426
427
if( weight < FLT_EPSILON )
428
continue;
429
430
lcw[0] += w0; rcw[0] -= w0;
431
lcw[1] += w1; rcw[1] -= w1;
432
433
if( split_criteria == CvBoost::GINI )
434
{
435
double lsum2 = lcw[0]*lcw[0] + lcw[1]*lcw[1];
436
double rsum2 = rcw[0]*rcw[0] + rcw[1]*rcw[1];
437
438
L += weight;
439
R -= weight;
440
441
if( L > FLT_EPSILON && R > FLT_EPSILON )
442
{
443
double val = (lsum2*R + rsum2*L)/(L*R);
444
if( best_val < val )
445
{
446
best_val = val;
447
best_subset = subset_i;
448
}
449
}
450
}
451
else
452
{
453
double val = lcw[0] + rcw[1];
454
double val2 = lcw[1] + rcw[0];
455
456
val = MAX(val, val2);
457
if( best_val < val )
458
{
459
best_val = val;
460
best_subset = subset_i;
461
}
462
}
463
}
464
465
CvDTreeSplit* split = 0;
466
if( best_subset >= 0 )
467
{
468
split = _split ? _split : data->new_split_cat( 0, -1.0f);
469
split->var_idx = vi;
470
split->quality = (float)best_val;
471
memset( split->subset, 0, (data->max_c_count + 31)/32 * sizeof(int));
472
for( i = 0; i <= best_subset; i++ )
473
{
474
idx = (int)(dbl_ptr[i] - cjk) >> 1;
475
split->subset[idx >> 5] |= 1 << (idx & 31);
476
}
477
}
478
return split;
479
}
480
481
482
CvDTreeSplit*
483
CvBoostTree::find_split_ord_reg( CvDTreeNode* node, int vi, float init_quality, CvDTreeSplit* _split, uchar* _ext_buf )
484
{
485
const float epsilon = FLT_EPSILON*2;
486
const double* weights = ensemble->get_subtree_weights()->data.db;
487
int n = node->sample_count;
488
int n1 = node->get_num_valid(vi);
489
490
cv::AutoBuffer<uchar> inn_buf;
491
if( !_ext_buf )
492
inn_buf.allocate(2*n*(sizeof(int)+sizeof(float)));
493
uchar* ext_buf = _ext_buf ? _ext_buf : inn_buf.data();
494
495
float* values_buf = (float*)ext_buf;
496
int* indices_buf = (int*)(values_buf + n);
497
int* sample_indices_buf = indices_buf + n;
498
const float* values = 0;
499
const int* indices = 0;
500
data->get_ord_var_data( node, vi, values_buf, indices_buf, &values, &indices, sample_indices_buf );
501
float* responses_buf = (float*)(indices_buf + n);
502
const float* responses = data->get_ord_responses( node, responses_buf, sample_indices_buf );
503
504
int i, best_i = -1;
505
double L = 0, R = weights[n];
506
double best_val = init_quality, lsum = 0, rsum = node->value*R;
507
508
// compensate for missing values
509
for( i = n1; i < n; i++ )
510
{
511
int idx = indices[i];
512
double w = weights[idx];
513
rsum -= responses[idx]*w;
514
R -= w;
515
}
516
517
// find the optimal split
518
for( i = 0; i < n1 - 1; i++ )
519
{
520
int idx = indices[i];
521
double w = weights[idx];
522
double t = responses[idx]*w;
523
L += w; R -= w;
524
lsum += t; rsum -= t;
525
526
if( values[i] + epsilon < values[i+1] )
527
{
528
double val = (lsum*lsum*R + rsum*rsum*L)/(L*R);
529
if( best_val < val )
530
{
531
best_val = val;
532
best_i = i;
533
}
534
}
535
}
536
537
CvDTreeSplit* split = 0;
538
if( best_i >= 0 )
539
{
540
split = _split ? _split : data->new_split_ord( 0, 0.0f, 0, 0, 0.0f );
541
split->var_idx = vi;
542
split->ord.c = (values[best_i] + values[best_i+1])*0.5f;
543
split->ord.split_point = best_i;
544
split->inversed = 0;
545
split->quality = (float)best_val;
546
}
547
return split;
548
}
549
550
551
CvDTreeSplit*
552
CvBoostTree::find_split_cat_reg( CvDTreeNode* node, int vi, float init_quality, CvDTreeSplit* _split, uchar* _ext_buf )
553
{
554
const double* weights = ensemble->get_subtree_weights()->data.db;
555
int ci = data->get_var_type(vi);
556
int n = node->sample_count;
557
int mi = data->cat_count->data.i[ci];
558
int base_size = (2*mi+3)*sizeof(double) + mi*sizeof(double*);
559
cv::AutoBuffer<uchar> inn_buf(base_size);
560
if( !_ext_buf )
561
inn_buf.allocate(base_size + n*(2*sizeof(int) + sizeof(float)));
562
uchar* base_buf = inn_buf.data();
563
uchar* ext_buf = _ext_buf ? _ext_buf : base_buf + base_size;
564
565
int* cat_labels_buf = (int*)ext_buf;
566
const int* cat_labels = data->get_cat_var_data(node, vi, cat_labels_buf);
567
float* responses_buf = (float*)(cat_labels_buf + n);
568
int* sample_indices_buf = (int*)(responses_buf + n);
569
const float* responses = data->get_ord_responses(node, responses_buf, sample_indices_buf);
570
571
double* sum = (double*)cv::alignPtr(base_buf,sizeof(double)) + 1;
572
double* counts = sum + mi + 1;
573
double** sum_ptr = (double**)(counts + mi);
574
double L = 0, R = 0, best_val = init_quality, lsum = 0, rsum = 0;
575
int i, best_subset = -1, subset_i;
576
577
for( i = -1; i < mi; i++ )
578
sum[i] = counts[i] = 0;
579
580
// calculate sum response and weight of each category of the input var
581
for( i = 0; i < n; i++ )
582
{
583
int idx = ((cat_labels[i] == 65535) && data->is_buf_16u) ? -1 : cat_labels[i];
584
double w = weights[i];
585
double s = sum[idx] + responses[i]*w;
586
double nc = counts[idx] + w;
587
sum[idx] = s;
588
counts[idx] = nc;
589
}
590
591
// calculate average response in each category
592
for( i = 0; i < mi; i++ )
593
{
594
R += counts[i];
595
rsum += sum[i];
596
sum[i] = fabs(counts[i]) > DBL_EPSILON ? sum[i]/counts[i] : 0;
597
sum_ptr[i] = sum + i;
598
}
599
600
std::sort(sum_ptr, sum_ptr + mi, LessThanPtr<double>());
601
602
// revert back to unnormalized sums
603
// (there should be a very little loss in accuracy)
604
for( i = 0; i < mi; i++ )
605
sum[i] *= counts[i];
606
607
for( subset_i = 0; subset_i < mi-1; subset_i++ )
608
{
609
int idx = (int)(sum_ptr[subset_i] - sum);
610
double ni = counts[idx];
611
612
if( ni > FLT_EPSILON )
613
{
614
double s = sum[idx];
615
lsum += s; L += ni;
616
rsum -= s; R -= ni;
617
618
if( L > FLT_EPSILON && R > FLT_EPSILON )
619
{
620
double val = (lsum*lsum*R + rsum*rsum*L)/(L*R);
621
if( best_val < val )
622
{
623
best_val = val;
624
best_subset = subset_i;
625
}
626
}
627
}
628
}
629
630
CvDTreeSplit* split = 0;
631
if( best_subset >= 0 )
632
{
633
split = _split ? _split : data->new_split_cat( 0, -1.0f);
634
split->var_idx = vi;
635
split->quality = (float)best_val;
636
memset( split->subset, 0, (data->max_c_count + 31)/32 * sizeof(int));
637
for( i = 0; i <= best_subset; i++ )
638
{
639
int idx = (int)(sum_ptr[i] - sum);
640
split->subset[idx >> 5] |= 1 << (idx & 31);
641
}
642
}
643
return split;
644
}
645
646
647
CvDTreeSplit*
648
CvBoostTree::find_surrogate_split_ord( CvDTreeNode* node, int vi, uchar* _ext_buf )
649
{
650
const float epsilon = FLT_EPSILON*2;
651
int n = node->sample_count;
652
cv::AutoBuffer<uchar> inn_buf;
653
if( !_ext_buf )
654
inn_buf.allocate(n*(2*sizeof(int)+sizeof(float)));
655
uchar* ext_buf = _ext_buf ? _ext_buf : inn_buf.data();
656
float* values_buf = (float*)ext_buf;
657
int* indices_buf = (int*)(values_buf + n);
658
int* sample_indices_buf = indices_buf + n;
659
const float* values = 0;
660
const int* indices = 0;
661
data->get_ord_var_data( node, vi, values_buf, indices_buf, &values, &indices, sample_indices_buf );
662
663
const double* weights = ensemble->get_subtree_weights()->data.db;
664
const char* dir = (char*)data->direction->data.ptr;
665
int n1 = node->get_num_valid(vi);
666
// LL - number of samples that both the primary and the surrogate splits send to the left
667
// LR - ... primary split sends to the left and the surrogate split sends to the right
668
// RL - ... primary split sends to the right and the surrogate split sends to the left
669
// RR - ... both send to the right
670
int i, best_i = -1, best_inversed = 0;
671
double best_val;
672
double LL = 0, RL = 0, LR, RR;
673
double worst_val = node->maxlr;
674
double sum = 0, sum_abs = 0;
675
best_val = worst_val;
676
677
for( i = 0; i < n1; i++ )
678
{
679
int idx = indices[i];
680
double w = weights[idx];
681
int d = dir[idx];
682
sum += d*w; sum_abs += (d & 1)*w;
683
}
684
685
// sum_abs = R + L; sum = R - L
686
RR = (sum_abs + sum)*0.5;
687
LR = (sum_abs - sum)*0.5;
688
689
// initially all the samples are sent to the right by the surrogate split,
690
// LR of them are sent to the left by primary split, and RR - to the right.
691
// now iteratively compute LL, LR, RL and RR for every possible surrogate split value.
692
for( i = 0; i < n1 - 1; i++ )
693
{
694
int idx = indices[i];
695
double w = weights[idx];
696
int d = dir[idx];
697
698
if( d < 0 )
699
{
700
LL += w; LR -= w;
701
if( LL + RR > best_val && values[i] + epsilon < values[i+1] )
702
{
703
best_val = LL + RR;
704
best_i = i; best_inversed = 0;
705
}
706
}
707
else if( d > 0 )
708
{
709
RL += w; RR -= w;
710
if( RL + LR > best_val && values[i] + epsilon < values[i+1] )
711
{
712
best_val = RL + LR;
713
best_i = i; best_inversed = 1;
714
}
715
}
716
}
717
718
return best_i >= 0 && best_val > node->maxlr ? data->new_split_ord( vi,
719
(values[best_i] + values[best_i+1])*0.5f, best_i,
720
best_inversed, (float)best_val ) : 0;
721
}
722
723
724
CvDTreeSplit*
725
CvBoostTree::find_surrogate_split_cat( CvDTreeNode* node, int vi, uchar* _ext_buf )
726
{
727
const char* dir = (char*)data->direction->data.ptr;
728
const double* weights = ensemble->get_subtree_weights()->data.db;
729
int n = node->sample_count;
730
int i, mi = data->cat_count->data.i[data->get_var_type(vi)];
731
732
int base_size = (2*mi+3)*sizeof(double);
733
cv::AutoBuffer<uchar> inn_buf(base_size);
734
if( !_ext_buf )
735
inn_buf.allocate(base_size + n*sizeof(int));
736
uchar* ext_buf = _ext_buf ? _ext_buf : inn_buf.data();
737
int* cat_labels_buf = (int*)ext_buf;
738
const int* cat_labels = data->get_cat_var_data(node, vi, cat_labels_buf);
739
740
// LL - number of samples that both the primary and the surrogate splits send to the left
741
// LR - ... primary split sends to the left and the surrogate split sends to the right
742
// RL - ... primary split sends to the right and the surrogate split sends to the left
743
// RR - ... both send to the right
744
CvDTreeSplit* split = data->new_split_cat( vi, 0 );
745
double best_val = 0;
746
double* lc = (double*)cv::alignPtr(cat_labels_buf + n, sizeof(double)) + 1;
747
double* rc = lc + mi + 1;
748
749
for( i = -1; i < mi; i++ )
750
lc[i] = rc[i] = 0;
751
752
// 1. for each category calculate the weight of samples
753
// sent to the left (lc) and to the right (rc) by the primary split
754
for( i = 0; i < n; i++ )
755
{
756
int idx = ((cat_labels[i] == 65535) && data->is_buf_16u) ? -1 : cat_labels[i];
757
double w = weights[i];
758
int d = dir[i];
759
double sum = lc[idx] + d*w;
760
double sum_abs = rc[idx] + (d & 1)*w;
761
lc[idx] = sum; rc[idx] = sum_abs;
762
}
763
764
for( i = 0; i < mi; i++ )
765
{
766
double sum = lc[i];
767
double sum_abs = rc[i];
768
lc[i] = (sum_abs - sum) * 0.5;
769
rc[i] = (sum_abs + sum) * 0.5;
770
}
771
772
// 2. now form the split.
773
// in each category send all the samples to the same direction as majority
774
for( i = 0; i < mi; i++ )
775
{
776
double lval = lc[i], rval = rc[i];
777
if( lval > rval )
778
{
779
split->subset[i >> 5] |= 1 << (i & 31);
780
best_val += lval;
781
}
782
else
783
best_val += rval;
784
}
785
786
split->quality = (float)best_val;
787
if( split->quality <= node->maxlr )
788
cvSetRemoveByPtr( data->split_heap, split ), split = 0;
789
790
return split;
791
}
792
793
794
void
795
CvBoostTree::calc_node_value( CvDTreeNode* node )
796
{
797
int i, n = node->sample_count;
798
const double* weights = ensemble->get_weights()->data.db;
799
cv::AutoBuffer<uchar> inn_buf(n*(sizeof(int) + ( data->is_classifier ? sizeof(int) : sizeof(int) + sizeof(float))));
800
int* labels_buf = (int*)inn_buf.data();
801
const int* labels = data->get_cv_labels(node, labels_buf);
802
double* subtree_weights = ensemble->get_subtree_weights()->data.db;
803
double rcw[2] = {0,0};
804
int boost_type = ensemble->get_params().boost_type;
805
806
if( data->is_classifier )
807
{
808
int* _responses_buf = labels_buf + n;
809
const int* _responses = data->get_class_labels(node, _responses_buf);
810
int m = data->get_num_classes();
811
int* cls_count = data->counts->data.i;
812
for( int k = 0; k < m; k++ )
813
cls_count[k] = 0;
814
815
for( i = 0; i < n; i++ )
816
{
817
int idx = labels[i];
818
double w = weights[idx];
819
int r = _responses[i];
820
rcw[r] += w;
821
cls_count[r]++;
822
subtree_weights[i] = w;
823
}
824
825
node->class_idx = rcw[1] > rcw[0];
826
827
if( boost_type == CvBoost::DISCRETE )
828
{
829
// ignore cat_map for responses, and use {-1,1},
830
// as the whole ensemble response is computes as sign(sum_i(weak_response_i)
831
node->value = node->class_idx*2 - 1;
832
}
833
else
834
{
835
double p = rcw[1]/(rcw[0] + rcw[1]);
836
assert( boost_type == CvBoost::REAL );
837
838
// store log-ratio of the probability
839
node->value = 0.5*log_ratio(p);
840
}
841
}
842
else
843
{
844
// in case of regression tree:
845
// * node value is 1/n*sum_i(Y_i), where Y_i is i-th response,
846
// n is the number of samples in the node.
847
// * node risk is the sum of squared errors: sum_i((Y_i - <node_value>)^2)
848
double sum = 0, sum2 = 0, iw;
849
float* values_buf = (float*)(labels_buf + n);
850
int* sample_indices_buf = (int*)(values_buf + n);
851
const float* values = data->get_ord_responses(node, values_buf, sample_indices_buf);
852
853
for( i = 0; i < n; i++ )
854
{
855
int idx = labels[i];
856
double w = weights[idx]/*priors[values[i] > 0]*/;
857
double t = values[i];
858
rcw[0] += w;
859
subtree_weights[i] = w;
860
sum += t*w;
861
sum2 += t*t*w;
862
}
863
864
iw = 1./rcw[0];
865
node->value = sum*iw;
866
node->node_risk = sum2 - (sum*iw)*sum;
867
868
// renormalize the risk, as in try_split_node the unweighted formula
869
// sqrt(risk)/n is used, rather than sqrt(risk)/sum(weights_i)
870
node->node_risk *= n*iw*n*iw;
871
}
872
873
// store summary weights
874
subtree_weights[n] = rcw[0];
875
subtree_weights[n+1] = rcw[1];
876
}
877
878
879
void CvBoostTree::read( CvFileStorage* fs, CvFileNode* fnode, CvBoost* _ensemble, CvDTreeTrainData* _data )
880
{
881
CvDTree::read( fs, fnode, _data );
882
ensemble = _ensemble;
883
}
884
885
void CvBoostTree::read( CvFileStorage*, CvFileNode* )
886
{
887
assert(0);
888
}
889
890
void CvBoostTree::read( CvFileStorage* _fs, CvFileNode* _node,
891
CvDTreeTrainData* _data )
892
{
893
CvDTree::read( _fs, _node, _data );
894
}
895
896
897
/////////////////////////////////// CvBoost /////////////////////////////////////
898
899
CvBoost::CvBoost()
900
{
901
data = 0;
902
weak = 0;
903
default_model_name = "my_boost_tree";
904
905
active_vars = active_vars_abs = orig_response = sum_response = weak_eval =
906
subsample_mask = weights = subtree_weights = 0;
907
have_active_cat_vars = have_subsample = false;
908
909
clear();
910
}
911
912
913
void CvBoost::prune( CvSlice slice )
914
{
915
if( weak && weak->total > 0 )
916
{
917
CvSeqReader reader;
918
int i, count = cvSliceLength( slice, weak );
919
920
cvStartReadSeq( weak, &reader );
921
cvSetSeqReaderPos( &reader, slice.start_index );
922
923
for( i = 0; i < count; i++ )
924
{
925
CvBoostTree* w;
926
CV_READ_SEQ_ELEM( w, reader );
927
delete w;
928
}
929
930
cvSeqRemoveSlice( weak, slice );
931
}
932
}
933
934
935
void CvBoost::clear()
936
{
937
if( weak )
938
{
939
prune( CV_WHOLE_SEQ );
940
cvReleaseMemStorage( &weak->storage );
941
}
942
if( data )
943
delete data;
944
weak = 0;
945
data = 0;
946
cvReleaseMat( &active_vars );
947
cvReleaseMat( &active_vars_abs );
948
cvReleaseMat( &orig_response );
949
cvReleaseMat( &sum_response );
950
cvReleaseMat( &weak_eval );
951
cvReleaseMat( &subsample_mask );
952
cvReleaseMat( &weights );
953
cvReleaseMat( &subtree_weights );
954
955
have_subsample = false;
956
}
957
958
959
CvBoost::~CvBoost()
960
{
961
clear();
962
}
963
964
965
CvBoost::CvBoost( const CvMat* _train_data, int _tflag,
966
const CvMat* _responses, const CvMat* _var_idx,
967
const CvMat* _sample_idx, const CvMat* _var_type,
968
const CvMat* _missing_mask, CvBoostParams _params )
969
{
970
weak = 0;
971
data = 0;
972
default_model_name = "my_boost_tree";
973
974
active_vars = active_vars_abs = orig_response = sum_response = weak_eval =
975
subsample_mask = weights = subtree_weights = 0;
976
977
train( _train_data, _tflag, _responses, _var_idx, _sample_idx,
978
_var_type, _missing_mask, _params );
979
}
980
981
982
bool
983
CvBoost::set_params( const CvBoostParams& _params )
984
{
985
bool ok = false;
986
987
CV_FUNCNAME( "CvBoost::set_params" );
988
989
__BEGIN__;
990
991
params = _params;
992
if( params.boost_type != DISCRETE && params.boost_type != REAL &&
993
params.boost_type != LOGIT && params.boost_type != GENTLE )
994
CV_ERROR( CV_StsBadArg, "Unknown/unsupported boosting type" );
995
996
params.weak_count = MAX( params.weak_count, 1 );
997
params.weight_trim_rate = MAX( params.weight_trim_rate, 0. );
998
params.weight_trim_rate = MIN( params.weight_trim_rate, 1. );
999
if( params.weight_trim_rate < FLT_EPSILON )
1000
params.weight_trim_rate = 1.f;
1001
1002
if( params.boost_type == DISCRETE &&
1003
params.split_criteria != GINI && params.split_criteria != MISCLASS )
1004
params.split_criteria = MISCLASS;
1005
if( params.boost_type == REAL &&
1006
params.split_criteria != GINI && params.split_criteria != MISCLASS )
1007
params.split_criteria = GINI;
1008
if( (params.boost_type == LOGIT || params.boost_type == GENTLE) &&
1009
params.split_criteria != SQERR )
1010
params.split_criteria = SQERR;
1011
1012
ok = true;
1013
1014
__END__;
1015
1016
return ok;
1017
}
1018
1019
1020
bool
1021
CvBoost::train( const CvMat* _train_data, int _tflag,
1022
const CvMat* _responses, const CvMat* _var_idx,
1023
const CvMat* _sample_idx, const CvMat* _var_type,
1024
const CvMat* _missing_mask,
1025
CvBoostParams _params, bool _update )
1026
{
1027
bool ok = false;
1028
CvMemStorage* storage = 0;
1029
1030
CV_FUNCNAME( "CvBoost::train" );
1031
1032
__BEGIN__;
1033
1034
int i;
1035
1036
set_params( _params );
1037
1038
cvReleaseMat( &active_vars );
1039
cvReleaseMat( &active_vars_abs );
1040
1041
if( !_update || !data )
1042
{
1043
clear();
1044
data = new CvDTreeTrainData( _train_data, _tflag, _responses, _var_idx,
1045
_sample_idx, _var_type, _missing_mask, _params, true, true );
1046
1047
if( data->get_num_classes() != 2 )
1048
CV_ERROR( CV_StsNotImplemented,
1049
"Boosted trees can only be used for 2-class classification." );
1050
CV_CALL( storage = cvCreateMemStorage() );
1051
weak = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvBoostTree*), storage );
1052
storage = 0;
1053
}
1054
else
1055
{
1056
data->set_data( _train_data, _tflag, _responses, _var_idx,
1057
_sample_idx, _var_type, _missing_mask, _params, true, true, true );
1058
}
1059
1060
if ( (_params.boost_type == LOGIT) || (_params.boost_type == GENTLE) )
1061
data->do_responses_copy();
1062
1063
update_weights( 0 );
1064
1065
for( i = 0; i < params.weak_count; i++ )
1066
{
1067
CvBoostTree* tree = new CvBoostTree;
1068
if( !tree->train( data, subsample_mask, this ) )
1069
{
1070
delete tree;
1071
break;
1072
}
1073
//cvCheckArr( get_weak_response());
1074
cvSeqPush( weak, &tree );
1075
update_weights( tree );
1076
trim_weights();
1077
if( cvCountNonZero(subsample_mask) == 0 )
1078
break;
1079
}
1080
1081
if(weak->total > 0)
1082
{
1083
get_active_vars(); // recompute active_vars* maps and condensed_idx's in the splits.
1084
data->is_classifier = true;
1085
data->free_train_data();
1086
ok = true;
1087
}
1088
else
1089
clear();
1090
1091
__END__;
1092
1093
return ok;
1094
}
1095
1096
bool CvBoost::train( CvMLData* _data,
1097
CvBoostParams _params,
1098
bool update )
1099
{
1100
bool result = false;
1101
1102
CV_FUNCNAME( "CvBoost::train" );
1103
1104
__BEGIN__;
1105
1106
const CvMat* values = _data->get_values();
1107
const CvMat* response = _data->get_responses();
1108
const CvMat* missing = _data->get_missing();
1109
const CvMat* var_types = _data->get_var_types();
1110
const CvMat* train_sidx = _data->get_train_sample_idx();
1111
const CvMat* var_idx = _data->get_var_idx();
1112
1113
CV_CALL( result = train( values, CV_ROW_SAMPLE, response, var_idx,
1114
train_sidx, var_types, missing, _params, update ) );
1115
1116
__END__;
1117
1118
return result;
1119
}
1120
1121
void CvBoost::initialize_weights(double (&p)[2])
1122
{
1123
p[0] = 1.;
1124
p[1] = 1.;
1125
}
1126
1127
void
1128
CvBoost::update_weights( CvBoostTree* tree )
1129
{
1130
CV_FUNCNAME( "CvBoost::update_weights" );
1131
1132
__BEGIN__;
1133
1134
int i, n = data->sample_count;
1135
double sumw = 0.;
1136
int step = 0;
1137
float* fdata = 0;
1138
int *sample_idx_buf;
1139
const int* sample_idx = 0;
1140
cv::AutoBuffer<uchar> inn_buf;
1141
size_t _buf_size = (params.boost_type == LOGIT) || (params.boost_type == GENTLE) ? (size_t)(data->sample_count)*sizeof(int) : 0;
1142
if( !tree )
1143
_buf_size += n*sizeof(int);
1144
else
1145
{
1146
if( have_subsample )
1147
_buf_size += data->get_length_subbuf()*(sizeof(float)+sizeof(uchar));
1148
}
1149
inn_buf.allocate(_buf_size);
1150
uchar* cur_buf_pos = inn_buf.data();
1151
1152
if ( (params.boost_type == LOGIT) || (params.boost_type == GENTLE) )
1153
{
1154
step = CV_IS_MAT_CONT(data->responses_copy->type) ?
1155
1 : data->responses_copy->step / CV_ELEM_SIZE(data->responses_copy->type);
1156
fdata = data->responses_copy->data.fl;
1157
sample_idx_buf = (int*)cur_buf_pos;
1158
cur_buf_pos = (uchar*)(sample_idx_buf + data->sample_count);
1159
sample_idx = data->get_sample_indices( data->data_root, sample_idx_buf );
1160
}
1161
CvMat* dtree_data_buf = data->buf;
1162
size_t length_buf_row = data->get_length_subbuf();
1163
if( !tree ) // before training the first tree, initialize weights and other parameters
1164
{
1165
int* class_labels_buf = (int*)cur_buf_pos;
1166
cur_buf_pos = (uchar*)(class_labels_buf + n);
1167
const int* class_labels = data->get_class_labels(data->data_root, class_labels_buf);
1168
// in case of logitboost and gentle adaboost each weak tree is a regression tree,
1169
// so we need to convert class labels to floating-point values
1170
1171
double w0 = 1./ n;
1172
double p[2] = { 1., 1. };
1173
initialize_weights(p);
1174
1175
cvReleaseMat( &orig_response );
1176
cvReleaseMat( &sum_response );
1177
cvReleaseMat( &weak_eval );
1178
cvReleaseMat( &subsample_mask );
1179
cvReleaseMat( &weights );
1180
cvReleaseMat( &subtree_weights );
1181
1182
CV_CALL( orig_response = cvCreateMat( 1, n, CV_32S ));
1183
CV_CALL( weak_eval = cvCreateMat( 1, n, CV_64F ));
1184
CV_CALL( subsample_mask = cvCreateMat( 1, n, CV_8U ));
1185
CV_CALL( weights = cvCreateMat( 1, n, CV_64F ));
1186
CV_CALL( subtree_weights = cvCreateMat( 1, n + 2, CV_64F ));
1187
1188
if( data->have_priors )
1189
{
1190
// compute weight scale for each class from their prior probabilities
1191
int c1 = 0;
1192
for( i = 0; i < n; i++ )
1193
c1 += class_labels[i];
1194
p[0] = data->priors->data.db[0]*(c1 < n ? 1./(n - c1) : 0.);
1195
p[1] = data->priors->data.db[1]*(c1 > 0 ? 1./c1 : 0.);
1196
p[0] /= p[0] + p[1];
1197
p[1] = 1. - p[0];
1198
}
1199
1200
if (data->is_buf_16u)
1201
{
1202
unsigned short* labels = (unsigned short*)(dtree_data_buf->data.s + data->data_root->buf_idx*length_buf_row +
1203
data->data_root->offset + (size_t)(data->work_var_count-1)*data->sample_count);
1204
for( i = 0; i < n; i++ )
1205
{
1206
// save original categorical responses {0,1}, convert them to {-1,1}
1207
orig_response->data.i[i] = class_labels[i]*2 - 1;
1208
// make all the samples active at start.
1209
// later, in trim_weights() deactivate/reactive again some, if need
1210
subsample_mask->data.ptr[i] = (uchar)1;
1211
// make all the initial weights the same.
1212
weights->data.db[i] = w0*p[class_labels[i]];
1213
// set the labels to find (from within weak tree learning proc)
1214
// the particular sample weight, and where to store the response.
1215
labels[i] = (unsigned short)i;
1216
}
1217
}
1218
else
1219
{
1220
int* labels = dtree_data_buf->data.i + data->data_root->buf_idx*length_buf_row +
1221
data->data_root->offset + (size_t)(data->work_var_count-1)*data->sample_count;
1222
1223
for( i = 0; i < n; i++ )
1224
{
1225
// save original categorical responses {0,1}, convert them to {-1,1}
1226
orig_response->data.i[i] = class_labels[i]*2 - 1;
1227
// make all the samples active at start.
1228
// later, in trim_weights() deactivate/reactive again some, if need
1229
subsample_mask->data.ptr[i] = (uchar)1;
1230
// make all the initial weights the same.
1231
weights->data.db[i] = w0*p[class_labels[i]];
1232
// set the labels to find (from within weak tree learning proc)
1233
// the particular sample weight, and where to store the response.
1234
labels[i] = i;
1235
}
1236
}
1237
1238
if( params.boost_type == LOGIT )
1239
{
1240
CV_CALL( sum_response = cvCreateMat( 1, n, CV_64F ));
1241
1242
for( i = 0; i < n; i++ )
1243
{
1244
sum_response->data.db[i] = 0;
1245
fdata[sample_idx[i]*step] = orig_response->data.i[i] > 0 ? 2.f : -2.f;
1246
}
1247
1248
// in case of logitboost each weak tree is a regression tree.
1249
// the target function values are recalculated for each of the trees
1250
data->is_classifier = false;
1251
}
1252
else if( params.boost_type == GENTLE )
1253
{
1254
for( i = 0; i < n; i++ )
1255
fdata[sample_idx[i]*step] = (float)orig_response->data.i[i];
1256
1257
data->is_classifier = false;
1258
}
1259
}
1260
else
1261
{
1262
// at this moment, for all the samples that participated in the training of the most
1263
// recent weak classifier we know the responses. For other samples we need to compute them
1264
if( have_subsample )
1265
{
1266
float* values = (float*)cur_buf_pos;
1267
cur_buf_pos = (uchar*)(values + data->get_length_subbuf());
1268
uchar* missing = cur_buf_pos;
1269
cur_buf_pos = missing + data->get_length_subbuf() * (size_t)CV_ELEM_SIZE(data->buf->type);
1270
1271
CvMat _sample, _mask;
1272
1273
// invert the subsample mask
1274
cvXorS( subsample_mask, cvScalar(1.), subsample_mask );
1275
data->get_vectors( subsample_mask, values, missing, 0 );
1276
1277
_sample = cvMat( 1, data->var_count, CV_32F );
1278
_mask = cvMat( 1, data->var_count, CV_8U );
1279
1280
// run tree through all the non-processed samples
1281
for( i = 0; i < n; i++ )
1282
if( subsample_mask->data.ptr[i] )
1283
{
1284
_sample.data.fl = values;
1285
_mask.data.ptr = missing;
1286
values += _sample.cols;
1287
missing += _mask.cols;
1288
weak_eval->data.db[i] = tree->predict( &_sample, &_mask, true )->value;
1289
}
1290
}
1291
1292
// now update weights and other parameters for each type of boosting
1293
if( params.boost_type == DISCRETE )
1294
{
1295
// Discrete AdaBoost:
1296
// weak_eval[i] (=f(x_i)) is in {-1,1}
1297
// err = sum(w_i*(f(x_i) != y_i))/sum(w_i)
1298
// C = log((1-err)/err)
1299
// w_i *= exp(C*(f(x_i) != y_i))
1300
1301
double C, err = 0.;
1302
double scale[] = { 1., 0. };
1303
1304
for( i = 0; i < n; i++ )
1305
{
1306
double w = weights->data.db[i];
1307
sumw += w;
1308
err += w*(weak_eval->data.db[i] != orig_response->data.i[i]);
1309
}
1310
1311
if( sumw != 0 )
1312
err /= sumw;
1313
C = err = -log_ratio( err );
1314
scale[1] = exp(err);
1315
1316
sumw = 0;
1317
for( i = 0; i < n; i++ )
1318
{
1319
double w = weights->data.db[i]*
1320
scale[weak_eval->data.db[i] != orig_response->data.i[i]];
1321
sumw += w;
1322
weights->data.db[i] = w;
1323
}
1324
1325
tree->scale( C );
1326
}
1327
else if( params.boost_type == REAL )
1328
{
1329
// Real AdaBoost:
1330
// weak_eval[i] = f(x_i) = 0.5*log(p(x_i)/(1-p(x_i))), p(x_i)=P(y=1|x_i)
1331
// w_i *= exp(-y_i*f(x_i))
1332
1333
for( i = 0; i < n; i++ )
1334
weak_eval->data.db[i] *= -orig_response->data.i[i];
1335
1336
cvExp( weak_eval, weak_eval );
1337
1338
for( i = 0; i < n; i++ )
1339
{
1340
double w = weights->data.db[i]*weak_eval->data.db[i];
1341
sumw += w;
1342
weights->data.db[i] = w;
1343
}
1344
}
1345
else if( params.boost_type == LOGIT )
1346
{
1347
// LogitBoost:
1348
// weak_eval[i] = f(x_i) in [-z_max,z_max]
1349
// sum_response = F(x_i).
1350
// F(x_i) += 0.5*f(x_i)
1351
// p(x_i) = exp(F(x_i))/(exp(F(x_i)) + exp(-F(x_i))=1/(1+exp(-2*F(x_i)))
1352
// reuse weak_eval: weak_eval[i] <- p(x_i)
1353
// w_i = p(x_i)*1(1 - p(x_i))
1354
// z_i = ((y_i+1)/2 - p(x_i))/(p(x_i)*(1 - p(x_i)))
1355
// store z_i to the data->data_root as the new target responses
1356
1357
const double lb_weight_thresh = FLT_EPSILON;
1358
const double lb_z_max = 10.;
1359
/*float* responses_buf = data->get_resp_float_buf();
1360
const float* responses = 0;
1361
data->get_ord_responses(data->data_root, responses_buf, &responses);*/
1362
1363
/*if( weak->total == 7 )
1364
putchar('*');*/
1365
1366
for( i = 0; i < n; i++ )
1367
{
1368
double s = sum_response->data.db[i] + 0.5*weak_eval->data.db[i];
1369
sum_response->data.db[i] = s;
1370
weak_eval->data.db[i] = -2*s;
1371
}
1372
1373
cvExp( weak_eval, weak_eval );
1374
1375
for( i = 0; i < n; i++ )
1376
{
1377
double p = 1./(1. + weak_eval->data.db[i]);
1378
double w = p*(1 - p), z;
1379
w = MAX( w, lb_weight_thresh );
1380
weights->data.db[i] = w;
1381
sumw += w;
1382
if( orig_response->data.i[i] > 0 )
1383
{
1384
z = 1./p;
1385
fdata[sample_idx[i]*step] = (float)MIN(z, lb_z_max);
1386
}
1387
else
1388
{
1389
z = 1./(1-p);
1390
fdata[sample_idx[i]*step] = (float)-MIN(z, lb_z_max);
1391
}
1392
}
1393
}
1394
else
1395
{
1396
// Gentle AdaBoost:
1397
// weak_eval[i] = f(x_i) in [-1,1]
1398
// w_i *= exp(-y_i*f(x_i))
1399
assert( params.boost_type == GENTLE );
1400
1401
for( i = 0; i < n; i++ )
1402
weak_eval->data.db[i] *= -orig_response->data.i[i];
1403
1404
cvExp( weak_eval, weak_eval );
1405
1406
for( i = 0; i < n; i++ )
1407
{
1408
double w = weights->data.db[i] * weak_eval->data.db[i];
1409
weights->data.db[i] = w;
1410
sumw += w;
1411
}
1412
}
1413
}
1414
1415
// renormalize weights
1416
if( sumw > FLT_EPSILON )
1417
{
1418
sumw = 1./sumw;
1419
for( i = 0; i < n; ++i )
1420
weights->data.db[i] *= sumw;
1421
}
1422
1423
__END__;
1424
}
1425
1426
1427
void
1428
CvBoost::trim_weights()
1429
{
1430
//CV_FUNCNAME( "CvBoost::trim_weights" );
1431
1432
__BEGIN__;
1433
1434
int i, count = data->sample_count, nz_count = 0;
1435
double sum, threshold;
1436
1437
if( params.weight_trim_rate <= 0. || params.weight_trim_rate >= 1. )
1438
EXIT;
1439
1440
// use weak_eval as temporary buffer for sorted weights
1441
cvCopy( weights, weak_eval );
1442
1443
std::sort(weak_eval->data.db, weak_eval->data.db + count);
1444
1445
// as weight trimming occurs immediately after updating the weights,
1446
// where they are renormalized, we assume that the weight sum = 1.
1447
sum = 1. - params.weight_trim_rate;
1448
1449
for( i = 0; i < count; i++ )
1450
{
1451
double w = weak_eval->data.db[i];
1452
if( sum <= 0 )
1453
break;
1454
sum -= w;
1455
}
1456
1457
threshold = i < count ? weak_eval->data.db[i] : DBL_MAX;
1458
1459
for( i = 0; i < count; i++ )
1460
{
1461
double w = weights->data.db[i];
1462
int f = w >= threshold;
1463
subsample_mask->data.ptr[i] = (uchar)f;
1464
nz_count += f;
1465
}
1466
1467
have_subsample = nz_count < count;
1468
1469
__END__;
1470
}
1471
1472
1473
const CvMat*
1474
CvBoost::get_active_vars( bool absolute_idx )
1475
{
1476
CvMat* mask = 0;
1477
CvMat* inv_map = 0;
1478
CvMat* result = 0;
1479
1480
CV_FUNCNAME( "CvBoost::get_active_vars" );
1481
1482
__BEGIN__;
1483
1484
if( !weak )
1485
CV_ERROR( CV_StsError, "The boosted tree ensemble has not been trained yet" );
1486
1487
if( !active_vars || !active_vars_abs )
1488
{
1489
CvSeqReader reader;
1490
int i, j, nactive_vars;
1491
CvBoostTree* wtree;
1492
const CvDTreeNode* node;
1493
1494
assert(!active_vars && !active_vars_abs);
1495
mask = cvCreateMat( 1, data->var_count, CV_8U );
1496
inv_map = cvCreateMat( 1, data->var_count, CV_32S );
1497
cvZero( mask );
1498
cvSet( inv_map, cvScalar(-1) );
1499
1500
// first pass: compute the mask of used variables
1501
cvStartReadSeq( weak, &reader );
1502
for( i = 0; i < weak->total; i++ )
1503
{
1504
CV_READ_SEQ_ELEM(wtree, reader);
1505
1506
node = wtree->get_root();
1507
assert( node != 0 );
1508
for(;;)
1509
{
1510
const CvDTreeNode* parent;
1511
for(;;)
1512
{
1513
CvDTreeSplit* split = node->split;
1514
for( ; split != 0; split = split->next )
1515
mask->data.ptr[split->var_idx] = 1;
1516
if( !node->left )
1517
break;
1518
node = node->left;
1519
}
1520
1521
for( parent = node->parent; parent && parent->right == node;
1522
node = parent, parent = parent->parent )
1523
;
1524
1525
if( !parent )
1526
break;
1527
1528
node = parent->right;
1529
}
1530
}
1531
1532
nactive_vars = cvCountNonZero(mask);
1533
1534
//if ( nactive_vars > 0 )
1535
{
1536
active_vars = cvCreateMat( 1, nactive_vars, CV_32S );
1537
active_vars_abs = cvCreateMat( 1, nactive_vars, CV_32S );
1538
1539
have_active_cat_vars = false;
1540
1541
for( i = j = 0; i < data->var_count; i++ )
1542
{
1543
if( mask->data.ptr[i] )
1544
{
1545
active_vars->data.i[j] = i;
1546
active_vars_abs->data.i[j] = data->var_idx ? data->var_idx->data.i[i] : i;
1547
inv_map->data.i[i] = j;
1548
if( data->var_type->data.i[i] >= 0 )
1549
have_active_cat_vars = true;
1550
j++;
1551
}
1552
}
1553
1554
1555
// second pass: now compute the condensed indices
1556
cvStartReadSeq( weak, &reader );
1557
for( i = 0; i < weak->total; i++ )
1558
{
1559
CV_READ_SEQ_ELEM(wtree, reader);
1560
node = wtree->get_root();
1561
for(;;)
1562
{
1563
const CvDTreeNode* parent;
1564
for(;;)
1565
{
1566
CvDTreeSplit* split = node->split;
1567
for( ; split != 0; split = split->next )
1568
{
1569
split->condensed_idx = inv_map->data.i[split->var_idx];
1570
assert( split->condensed_idx >= 0 );
1571
}
1572
1573
if( !node->left )
1574
break;
1575
node = node->left;
1576
}
1577
1578
for( parent = node->parent; parent && parent->right == node;
1579
node = parent, parent = parent->parent )
1580
;
1581
1582
if( !parent )
1583
break;
1584
1585
node = parent->right;
1586
}
1587
}
1588
}
1589
}
1590
1591
result = absolute_idx ? active_vars_abs : active_vars;
1592
1593
__END__;
1594
1595
cvReleaseMat( &mask );
1596
cvReleaseMat( &inv_map );
1597
1598
return result;
1599
}
1600
1601
1602
float
1603
CvBoost::predict( const CvMat* _sample, const CvMat* _missing,
1604
CvMat* weak_responses, CvSlice slice,
1605
bool raw_mode, bool return_sum ) const
1606
{
1607
float value = -FLT_MAX;
1608
1609
CvSeqReader reader;
1610
double sum = 0;
1611
int wstep = 0;
1612
const float* sample_data;
1613
1614
if( !weak )
1615
CV_Error( CV_StsError, "The boosted tree ensemble has not been trained yet" );
1616
1617
if( !CV_IS_MAT(_sample) || CV_MAT_TYPE(_sample->type) != CV_32FC1 ||
1618
(_sample->cols != 1 && _sample->rows != 1) ||
1619
(_sample->cols + _sample->rows - 1 != data->var_all && !raw_mode) ||
1620
(active_vars && _sample->cols + _sample->rows - 1 != active_vars->cols && raw_mode) )
1621
CV_Error( CV_StsBadArg,
1622
"the input sample must be 1d floating-point vector with the same "
1623
"number of elements as the total number of variables or "
1624
"as the number of variables used for training" );
1625
1626
if( _missing )
1627
{
1628
if( !CV_IS_MAT(_missing) || !CV_IS_MASK_ARR(_missing) ||
1629
!CV_ARE_SIZES_EQ(_missing, _sample) )
1630
CV_Error( CV_StsBadArg,
1631
"the missing data mask must be 8-bit vector of the same size as input sample" );
1632
}
1633
1634
int i, weak_count = cvSliceLength( slice, weak );
1635
if( weak_count >= weak->total )
1636
{
1637
weak_count = weak->total;
1638
slice.start_index = 0;
1639
}
1640
1641
if( weak_responses )
1642
{
1643
if( !CV_IS_MAT(weak_responses) ||
1644
CV_MAT_TYPE(weak_responses->type) != CV_32FC1 ||
1645
(weak_responses->cols != 1 && weak_responses->rows != 1) ||
1646
weak_responses->cols + weak_responses->rows - 1 != weak_count )
1647
CV_Error( CV_StsBadArg,
1648
"The output matrix of weak classifier responses must be valid "
1649
"floating-point vector of the same number of components as the length of input slice" );
1650
wstep = CV_IS_MAT_CONT(weak_responses->type) ? 1 : weak_responses->step/sizeof(float);
1651
}
1652
1653
int var_count = active_vars->cols;
1654
const int* vtype = data->var_type->data.i;
1655
const int* cmap = data->cat_map->data.i;
1656
const int* cofs = data->cat_ofs->data.i;
1657
1658
cv::Mat sample = cv::cvarrToMat(_sample);
1659
cv::Mat missing;
1660
if(!_missing)
1661
missing = cv::cvarrToMat(_missing);
1662
1663
// if need, preprocess the input vector
1664
if( !raw_mode )
1665
{
1666
int sstep, mstep = 0;
1667
const float* src_sample;
1668
const uchar* src_mask = 0;
1669
float* dst_sample;
1670
uchar* dst_mask;
1671
const int* vidx = active_vars->data.i;
1672
const int* vidx_abs = active_vars_abs->data.i;
1673
bool have_mask = _missing != 0;
1674
1675
sample = cv::Mat(1, var_count, CV_32FC1);
1676
missing = cv::Mat(1, var_count, CV_8UC1);
1677
1678
dst_sample = sample.ptr<float>();
1679
dst_mask = missing.ptr<uchar>();
1680
1681
src_sample = _sample->data.fl;
1682
sstep = CV_IS_MAT_CONT(_sample->type) ? 1 : _sample->step/sizeof(src_sample[0]);
1683
1684
if( _missing )
1685
{
1686
src_mask = _missing->data.ptr;
1687
mstep = CV_IS_MAT_CONT(_missing->type) ? 1 : _missing->step;
1688
}
1689
1690
for( i = 0; i < var_count; i++ )
1691
{
1692
int idx = vidx[i], idx_abs = vidx_abs[i];
1693
float val = src_sample[idx_abs*sstep];
1694
int ci = vtype[idx];
1695
uchar m = src_mask ? src_mask[idx_abs*mstep] : (uchar)0;
1696
1697
if( ci >= 0 )
1698
{
1699
int a = cofs[ci], b = (ci+1 >= data->cat_ofs->cols) ? data->cat_map->cols : cofs[ci+1],
1700
c = a;
1701
int ival = cvRound(val);
1702
if ( (ival != val) && (!m) )
1703
CV_Error( CV_StsBadArg,
1704
"one of input categorical variable is not an integer" );
1705
1706
while( a < b )
1707
{
1708
c = (a + b) >> 1;
1709
if( ival < cmap[c] )
1710
b = c;
1711
else if( ival > cmap[c] )
1712
a = c+1;
1713
else
1714
break;
1715
}
1716
1717
if( c < 0 || ival != cmap[c] )
1718
{
1719
m = 1;
1720
have_mask = true;
1721
}
1722
else
1723
{
1724
val = (float)(c - cofs[ci]);
1725
}
1726
}
1727
1728
dst_sample[i] = val;
1729
dst_mask[i] = m;
1730
}
1731
1732
if( !have_mask )
1733
missing.release();
1734
}
1735
else
1736
{
1737
if( !CV_IS_MAT_CONT(_sample->type & (_missing ? _missing->type : -1)) )
1738
CV_Error( CV_StsBadArg, "In raw mode the input vectors must be continuous" );
1739
}
1740
1741
cvStartReadSeq( weak, &reader );
1742
cvSetSeqReaderPos( &reader, slice.start_index );
1743
1744
sample_data = sample.ptr<float>();
1745
1746
if( !have_active_cat_vars && missing.empty() && !weak_responses )
1747
{
1748
for( i = 0; i < weak_count; i++ )
1749
{
1750
CvBoostTree* wtree;
1751
const CvDTreeNode* node;
1752
CV_READ_SEQ_ELEM( wtree, reader );
1753
1754
node = wtree->get_root();
1755
while( node->left )
1756
{
1757
CvDTreeSplit* split = node->split;
1758
int vi = split->condensed_idx;
1759
float val = sample_data[vi];
1760
int dir = val <= split->ord.c ? -1 : 1;
1761
if( split->inversed )
1762
dir = -dir;
1763
node = dir < 0 ? node->left : node->right;
1764
}
1765
sum += node->value;
1766
}
1767
}
1768
else
1769
{
1770
const int* avars = active_vars->data.i;
1771
const uchar* m = !missing.empty() ? missing.ptr<uchar>() : 0;
1772
1773
// full-featured version
1774
for( i = 0; i < weak_count; i++ )
1775
{
1776
CvBoostTree* wtree;
1777
const CvDTreeNode* node;
1778
CV_READ_SEQ_ELEM( wtree, reader );
1779
1780
node = wtree->get_root();
1781
while( node->left )
1782
{
1783
const CvDTreeSplit* split = node->split;
1784
int dir = 0;
1785
for( ; !dir && split != 0; split = split->next )
1786
{
1787
int vi = split->condensed_idx;
1788
int ci = vtype[avars[vi]];
1789
float val = sample_data[vi];
1790
if( m && m[vi] )
1791
continue;
1792
if( ci < 0 ) // ordered
1793
dir = val <= split->ord.c ? -1 : 1;
1794
else // categorical
1795
{
1796
int c = cvRound(val);
1797
dir = CV_DTREE_CAT_DIR(c, split->subset);
1798
}
1799
if( split->inversed )
1800
dir = -dir;
1801
}
1802
1803
if( !dir )
1804
{
1805
int diff = node->right->sample_count - node->left->sample_count;
1806
dir = diff < 0 ? -1 : 1;
1807
}
1808
node = dir < 0 ? node->left : node->right;
1809
}
1810
if( weak_responses )
1811
weak_responses->data.fl[i*wstep] = (float)node->value;
1812
sum += node->value;
1813
}
1814
}
1815
1816
if( return_sum )
1817
value = (float)sum;
1818
else
1819
{
1820
int cls_idx = sum >= 0;
1821
if( raw_mode )
1822
value = (float)cls_idx;
1823
else
1824
value = (float)cmap[cofs[vtype[data->var_count]] + cls_idx];
1825
}
1826
1827
return value;
1828
}
1829
1830
float CvBoost::calc_error( CvMLData* _data, int type, std::vector<float> *resp )
1831
{
1832
float err = 0;
1833
const CvMat* values = _data->get_values();
1834
const CvMat* response = _data->get_responses();
1835
const CvMat* missing = _data->get_missing();
1836
const CvMat* sample_idx = (type == CV_TEST_ERROR) ? _data->get_test_sample_idx() : _data->get_train_sample_idx();
1837
const CvMat* var_types = _data->get_var_types();
1838
int* sidx = sample_idx ? sample_idx->data.i : 0;
1839
int r_step = CV_IS_MAT_CONT(response->type) ?
1840
1 : response->step / CV_ELEM_SIZE(response->type);
1841
bool is_classifier = var_types->data.ptr[var_types->cols-1] == CV_VAR_CATEGORICAL;
1842
int sample_count = sample_idx ? sample_idx->cols : 0;
1843
sample_count = (type == CV_TRAIN_ERROR && sample_count == 0) ? values->rows : sample_count;
1844
float* pred_resp = 0;
1845
if( resp && (sample_count > 0) )
1846
{
1847
resp->resize( sample_count );
1848
pred_resp = &((*resp)[0]);
1849
}
1850
if ( is_classifier )
1851
{
1852
for( int i = 0; i < sample_count; i++ )
1853
{
1854
CvMat sample, miss;
1855
int si = sidx ? sidx[i] : i;
1856
cvGetRow( values, &sample, si );
1857
if( missing )
1858
cvGetRow( missing, &miss, si );
1859
float r = (float)predict( &sample, missing ? &miss : 0 );
1860
if( pred_resp )
1861
pred_resp[i] = r;
1862
int d = fabs((double)r - response->data.fl[si*r_step]) <= FLT_EPSILON ? 0 : 1;
1863
err += d;
1864
}
1865
err = sample_count ? err / (float)sample_count * 100 : -FLT_MAX;
1866
}
1867
else
1868
{
1869
for( int i = 0; i < sample_count; i++ )
1870
{
1871
CvMat sample, miss;
1872
int si = sidx ? sidx[i] : i;
1873
cvGetRow( values, &sample, si );
1874
if( missing )
1875
cvGetRow( missing, &miss, si );
1876
float r = (float)predict( &sample, missing ? &miss : 0 );
1877
if( pred_resp )
1878
pred_resp[i] = r;
1879
float d = r - response->data.fl[si*r_step];
1880
err += d*d;
1881
}
1882
err = sample_count ? err / (float)sample_count : -FLT_MAX;
1883
}
1884
return err;
1885
}
1886
1887
void CvBoost::write_params( CvFileStorage* fs ) const
1888
{
1889
const char* boost_type_str =
1890
params.boost_type == DISCRETE ? "DiscreteAdaboost" :
1891
params.boost_type == REAL ? "RealAdaboost" :
1892
params.boost_type == LOGIT ? "LogitBoost" :
1893
params.boost_type == GENTLE ? "GentleAdaboost" : 0;
1894
1895
const char* split_crit_str =
1896
params.split_criteria == DEFAULT ? "Default" :
1897
params.split_criteria == GINI ? "Gini" :
1898
params.boost_type == MISCLASS ? "Misclassification" :
1899
params.boost_type == SQERR ? "SquaredErr" : 0;
1900
1901
if( boost_type_str )
1902
cvWriteString( fs, "boosting_type", boost_type_str );
1903
else
1904
cvWriteInt( fs, "boosting_type", params.boost_type );
1905
1906
if( split_crit_str )
1907
cvWriteString( fs, "splitting_criteria", split_crit_str );
1908
else
1909
cvWriteInt( fs, "splitting_criteria", params.split_criteria );
1910
1911
cvWriteInt( fs, "ntrees", weak->total );
1912
cvWriteReal( fs, "weight_trimming_rate", params.weight_trim_rate );
1913
1914
data->write_params( fs );
1915
}
1916
1917
1918
void CvBoost::read_params( CvFileStorage* fs, CvFileNode* fnode )
1919
{
1920
CV_FUNCNAME( "CvBoost::read_params" );
1921
1922
__BEGIN__;
1923
1924
CvFileNode* temp;
1925
1926
if( !fnode || !CV_NODE_IS_MAP(fnode->tag) )
1927
return;
1928
1929
data = new CvDTreeTrainData();
1930
CV_CALL( data->read_params(fs, fnode));
1931
data->shared = true;
1932
1933
params.max_depth = data->params.max_depth;
1934
params.min_sample_count = data->params.min_sample_count;
1935
params.max_categories = data->params.max_categories;
1936
params.priors = data->params.priors;
1937
params.regression_accuracy = data->params.regression_accuracy;
1938
params.use_surrogates = data->params.use_surrogates;
1939
1940
temp = cvGetFileNodeByName( fs, fnode, "boosting_type" );
1941
if( !temp )
1942
return;
1943
1944
if( temp && CV_NODE_IS_STRING(temp->tag) )
1945
{
1946
const char* boost_type_str = cvReadString( temp, "" );
1947
params.boost_type = strcmp( boost_type_str, "DiscreteAdaboost" ) == 0 ? DISCRETE :
1948
strcmp( boost_type_str, "RealAdaboost" ) == 0 ? REAL :
1949
strcmp( boost_type_str, "LogitBoost" ) == 0 ? LOGIT :
1950
strcmp( boost_type_str, "GentleAdaboost" ) == 0 ? GENTLE : -1;
1951
}
1952
else
1953
params.boost_type = cvReadInt( temp, -1 );
1954
1955
if( params.boost_type < DISCRETE || params.boost_type > GENTLE )
1956
CV_ERROR( CV_StsBadArg, "Unknown boosting type" );
1957
1958
temp = cvGetFileNodeByName( fs, fnode, "splitting_criteria" );
1959
if( temp && CV_NODE_IS_STRING(temp->tag) )
1960
{
1961
const char* split_crit_str = cvReadString( temp, "" );
1962
params.split_criteria = strcmp( split_crit_str, "Default" ) == 0 ? DEFAULT :
1963
strcmp( split_crit_str, "Gini" ) == 0 ? GINI :
1964
strcmp( split_crit_str, "Misclassification" ) == 0 ? MISCLASS :
1965
strcmp( split_crit_str, "SquaredErr" ) == 0 ? SQERR : -1;
1966
}
1967
else
1968
params.split_criteria = cvReadInt( temp, -1 );
1969
1970
if( params.split_criteria < DEFAULT || params.boost_type > SQERR )
1971
CV_ERROR( CV_StsBadArg, "Unknown boosting type" );
1972
1973
params.weak_count = cvReadIntByName( fs, fnode, "ntrees" );
1974
params.weight_trim_rate = cvReadRealByName( fs, fnode, "weight_trimming_rate", 0. );
1975
1976
__END__;
1977
}
1978
1979
1980
1981
void
1982
CvBoost::read( CvFileStorage* fs, CvFileNode* node )
1983
{
1984
CV_FUNCNAME( "CvBoost::read" );
1985
1986
__BEGIN__;
1987
1988
CvSeqReader reader;
1989
CvFileNode* trees_fnode;
1990
CvMemStorage* storage;
1991
int i, ntrees;
1992
1993
clear();
1994
read_params( fs, node );
1995
1996
if( !data )
1997
EXIT;
1998
1999
trees_fnode = cvGetFileNodeByName( fs, node, "trees" );
2000
if( !trees_fnode || !CV_NODE_IS_SEQ(trees_fnode->tag) )
2001
CV_ERROR( CV_StsParseError, "<trees> tag is missing" );
2002
2003
cvStartReadSeq( trees_fnode->data.seq, &reader );
2004
ntrees = trees_fnode->data.seq->total;
2005
2006
if( ntrees != params.weak_count )
2007
CV_ERROR( CV_StsUnmatchedSizes,
2008
"The number of trees stored does not match <ntrees> tag value" );
2009
2010
CV_CALL( storage = cvCreateMemStorage() );
2011
weak = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvBoostTree*), storage );
2012
2013
for( i = 0; i < ntrees; i++ )
2014
{
2015
CvBoostTree* tree = new CvBoostTree();
2016
CV_CALL(tree->read( fs, (CvFileNode*)reader.ptr, this, data ));
2017
CV_NEXT_SEQ_ELEM( reader.seq->elem_size, reader );
2018
cvSeqPush( weak, &tree );
2019
}
2020
get_active_vars();
2021
2022
__END__;
2023
}
2024
2025
2026
void
2027
CvBoost::write( CvFileStorage* fs, const char* name ) const
2028
{
2029
CV_FUNCNAME( "CvBoost::write" );
2030
2031
__BEGIN__;
2032
2033
CvSeqReader reader;
2034
int i;
2035
2036
cvStartWriteStruct( fs, name, CV_NODE_MAP, CV_TYPE_NAME_ML_BOOSTING );
2037
2038
if( !weak )
2039
CV_ERROR( CV_StsBadArg, "The classifier has not been trained yet" );
2040
2041
write_params( fs );
2042
cvStartWriteStruct( fs, "trees", CV_NODE_SEQ );
2043
2044
cvStartReadSeq( weak, &reader );
2045
2046
for( i = 0; i < weak->total; i++ )
2047
{
2048
CvBoostTree* tree;
2049
CV_READ_SEQ_ELEM( tree, reader );
2050
cvStartWriteStruct( fs, 0, CV_NODE_MAP );
2051
tree->write( fs );
2052
cvEndWriteStruct( fs );
2053
}
2054
2055
cvEndWriteStruct( fs );
2056
cvEndWriteStruct( fs );
2057
2058
__END__;
2059
}
2060
2061
2062
CvMat*
2063
CvBoost::get_weights()
2064
{
2065
return weights;
2066
}
2067
2068
2069
CvMat*
2070
CvBoost::get_subtree_weights()
2071
{
2072
return subtree_weights;
2073
}
2074
2075
2076
CvMat*
2077
CvBoost::get_weak_response()
2078
{
2079
return weak_eval;
2080
}
2081
2082
2083
const CvBoostParams&
2084
CvBoost::get_params() const
2085
{
2086
return params;
2087
}
2088
2089
CvSeq* CvBoost::get_weak_predictors()
2090
{
2091
return weak;
2092
}
2093
2094
const CvDTreeTrainData* CvBoost::get_data() const
2095
{
2096
return data;
2097
}
2098
2099
using namespace cv;
2100
2101
CvBoost::CvBoost( const Mat& _train_data, int _tflag,
2102
const Mat& _responses, const Mat& _var_idx,
2103
const Mat& _sample_idx, const Mat& _var_type,
2104
const Mat& _missing_mask,
2105
CvBoostParams _params )
2106
{
2107
weak = 0;
2108
data = 0;
2109
default_model_name = "my_boost_tree";
2110
active_vars = active_vars_abs = orig_response = sum_response = weak_eval =
2111
subsample_mask = weights = subtree_weights = 0;
2112
2113
train( _train_data, _tflag, _responses, _var_idx, _sample_idx,
2114
_var_type, _missing_mask, _params );
2115
}
2116
2117
2118
bool
2119
CvBoost::train( const Mat& _train_data, int _tflag,
2120
const Mat& _responses, const Mat& _var_idx,
2121
const Mat& _sample_idx, const Mat& _var_type,
2122
const Mat& _missing_mask,
2123
CvBoostParams _params, bool _update )
2124
{
2125
train_data_hdr = cvMat(_train_data);
2126
train_data_mat = _train_data;
2127
responses_hdr = cvMat(_responses);
2128
responses_mat = _responses;
2129
2130
CvMat vidx = cvMat(_var_idx), sidx = cvMat(_sample_idx), vtype = cvMat(_var_type), mmask = cvMat(_missing_mask);
2131
2132
return train(&train_data_hdr, _tflag, &responses_hdr, vidx.data.ptr ? &vidx : 0,
2133
sidx.data.ptr ? &sidx : 0, vtype.data.ptr ? &vtype : 0,
2134
mmask.data.ptr ? &mmask : 0, _params, _update);
2135
}
2136
2137
float
2138
CvBoost::predict( const Mat& _sample, const Mat& _missing,
2139
const Range& slice, bool raw_mode, bool return_sum ) const
2140
{
2141
CvMat sample = cvMat(_sample), mmask = cvMat(_missing);
2142
/*if( weak_responses )
2143
{
2144
int weak_count = cvSliceLength( slice, weak );
2145
if( weak_count >= weak->total )
2146
{
2147
weak_count = weak->total;
2148
slice.start_index = 0;
2149
}
2150
2151
if( !(weak_responses->data && weak_responses->type() == CV_32FC1 &&
2152
(weak_responses->cols == 1 || weak_responses->rows == 1) &&
2153
weak_responses->cols + weak_responses->rows - 1 == weak_count) )
2154
weak_responses->create(weak_count, 1, CV_32FC1);
2155
pwr = &(wr = *weak_responses);
2156
}*/
2157
return predict(&sample, _missing.empty() ? 0 : &mmask, 0,
2158
slice == Range::all() ? CV_WHOLE_SEQ : cvSlice(slice.start, slice.end),
2159
raw_mode, return_sum);
2160
}
2161
2162
/* End of file. */
2163
2164