Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/ml/test/test_emknearestkmeans.cpp
16339 views
1
/*M///////////////////////////////////////////////////////////////////////////////////////
2
//
3
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4
//
5
// By downloading, copying, installing or using the software you agree to this license.
6
// If you do not agree to this license, do not download, install,
7
// copy or use the software.
8
//
9
//
10
// Intel License Agreement
11
// For Open Source Computer Vision Library
12
//
13
// Copyright (C) 2000, Intel Corporation, all rights reserved.
14
// Third party copyrights are property of their respective owners.
15
//
16
// Redistribution and use in source and binary forms, with or without modification,
17
// are permitted provided that the following conditions are met:
18
//
19
// * Redistribution's of source code must retain the above copyright notice,
20
// this list of conditions and the following disclaimer.
21
//
22
// * Redistribution's in binary form must reproduce the above copyright notice,
23
// this list of conditions and the following disclaimer in the documentation
24
// and/or other materials provided with the distribution.
25
//
26
// * The name of Intel Corporation may not be used to endorse or promote products
27
// derived from this software without specific prior written permission.
28
//
29
// This software is provided by the copyright holders and contributors "as is" and
30
// any express or implied warranties, including, but not limited to, the implied
31
// warranties of merchantability and fitness for a particular purpose are disclaimed.
32
// In no event shall the Intel Corporation or contributors be liable for any direct,
33
// indirect, incidental, special, exemplary, or consequential damages
34
// (including, but not limited to, procurement of substitute goods or services;
35
// loss of use, data, or profits; or business interruption) however caused
36
// and on any theory of liability, whether in contract, strict liability,
37
// or tort (including negligence or otherwise) arising in any way out of
38
// the use of this software, even if advised of the possibility of such damage.
39
//
40
//M*/
41
42
#include "test_precomp.hpp"
43
44
namespace opencv_test { namespace {
45
46
using cv::ml::TrainData;
47
using cv::ml::EM;
48
using cv::ml::KNearest;
49
50
void defaultDistribs( Mat& means, vector<Mat>& covs, int type=CV_32FC1 )
51
{
52
CV_TRACE_FUNCTION();
53
float mp0[] = {0.0f, 0.0f}, cp0[] = {0.67f, 0.0f, 0.0f, 0.67f};
54
float mp1[] = {5.0f, 0.0f}, cp1[] = {1.0f, 0.0f, 0.0f, 1.0f};
55
float mp2[] = {1.0f, 5.0f}, cp2[] = {1.0f, 0.0f, 0.0f, 1.0f};
56
means.create(3, 2, type);
57
Mat m0( 1, 2, CV_32FC1, mp0 ), c0( 2, 2, CV_32FC1, cp0 );
58
Mat m1( 1, 2, CV_32FC1, mp1 ), c1( 2, 2, CV_32FC1, cp1 );
59
Mat m2( 1, 2, CV_32FC1, mp2 ), c2( 2, 2, CV_32FC1, cp2 );
60
means.resize(3), covs.resize(3);
61
62
Mat mr0 = means.row(0);
63
m0.convertTo(mr0, type);
64
c0.convertTo(covs[0], type);
65
66
Mat mr1 = means.row(1);
67
m1.convertTo(mr1, type);
68
c1.convertTo(covs[1], type);
69
70
Mat mr2 = means.row(2);
71
m2.convertTo(mr2, type);
72
c2.convertTo(covs[2], type);
73
}
74
75
// generate points sets by normal distributions
76
void generateData( Mat& data, Mat& labels, const vector<int>& sizes, const Mat& _means, const vector<Mat>& covs, int dataType, int labelType )
77
{
78
CV_TRACE_FUNCTION();
79
vector<int>::const_iterator sit = sizes.begin();
80
int total = 0;
81
for( ; sit != sizes.end(); ++sit )
82
total += *sit;
83
CV_Assert( _means.rows == (int)sizes.size() && covs.size() == sizes.size() );
84
CV_Assert( !data.empty() && data.rows == total );
85
CV_Assert( data.type() == dataType );
86
87
labels.create( data.rows, 1, labelType );
88
89
randn( data, Scalar::all(-1.0), Scalar::all(1.0) );
90
vector<Mat> means(sizes.size());
91
for(int i = 0; i < _means.rows; i++)
92
means[i] = _means.row(i);
93
vector<Mat>::const_iterator mit = means.begin(), cit = covs.begin();
94
int bi, ei = 0;
95
sit = sizes.begin();
96
for( int p = 0, l = 0; sit != sizes.end(); ++sit, ++mit, ++cit, l++ )
97
{
98
bi = ei;
99
ei = bi + *sit;
100
assert( mit->rows == 1 && mit->cols == data.cols );
101
assert( cit->rows == data.cols && cit->cols == data.cols );
102
for( int i = bi; i < ei; i++, p++ )
103
{
104
Mat r = data.row(i);
105
r = r * (*cit) + *mit;
106
if( labelType == CV_32FC1 )
107
labels.at<float>(p, 0) = (float)l;
108
else if( labelType == CV_32SC1 )
109
labels.at<int>(p, 0) = l;
110
else
111
{
112
CV_DbgAssert(0);
113
}
114
}
115
}
116
}
117
118
int maxIdx( const vector<int>& count )
119
{
120
int idx = -1;
121
int maxVal = -1;
122
vector<int>::const_iterator it = count.begin();
123
for( int i = 0; it != count.end(); ++it, i++ )
124
{
125
if( *it > maxVal)
126
{
127
maxVal = *it;
128
idx = i;
129
}
130
}
131
assert( idx >= 0);
132
return idx;
133
}
134
135
bool getLabelsMap( const Mat& labels, const vector<int>& sizes, vector<int>& labelsMap, bool checkClusterUniq=true )
136
{
137
size_t total = 0, nclusters = sizes.size();
138
for(size_t i = 0; i < sizes.size(); i++)
139
total += sizes[i];
140
141
assert( !labels.empty() );
142
assert( labels.total() == total && (labels.cols == 1 || labels.rows == 1));
143
assert( labels.type() == CV_32SC1 || labels.type() == CV_32FC1 );
144
145
bool isFlt = labels.type() == CV_32FC1;
146
147
labelsMap.resize(nclusters);
148
149
vector<bool> buzy(nclusters, false);
150
int startIndex = 0;
151
for( size_t clusterIndex = 0; clusterIndex < sizes.size(); clusterIndex++ )
152
{
153
vector<int> count( nclusters, 0 );
154
for( int i = startIndex; i < startIndex + sizes[clusterIndex]; i++)
155
{
156
int lbl = isFlt ? (int)labels.at<float>(i) : labels.at<int>(i);
157
CV_Assert(lbl < (int)nclusters);
158
count[lbl]++;
159
CV_Assert(count[lbl] < (int)total);
160
}
161
startIndex += sizes[clusterIndex];
162
163
int cls = maxIdx( count );
164
CV_Assert( !checkClusterUniq || !buzy[cls] );
165
166
labelsMap[clusterIndex] = cls;
167
168
buzy[cls] = true;
169
}
170
171
if(checkClusterUniq)
172
{
173
for(size_t i = 0; i < buzy.size(); i++)
174
if(!buzy[i])
175
return false;
176
}
177
178
return true;
179
}
180
181
bool calcErr( const Mat& labels, const Mat& origLabels, const vector<int>& sizes, float& err, bool labelsEquivalent = true, bool checkClusterUniq=true )
182
{
183
err = 0;
184
CV_Assert( !labels.empty() && !origLabels.empty() );
185
CV_Assert( labels.rows == 1 || labels.cols == 1 );
186
CV_Assert( origLabels.rows == 1 || origLabels.cols == 1 );
187
CV_Assert( labels.total() == origLabels.total() );
188
CV_Assert( labels.type() == CV_32SC1 || labels.type() == CV_32FC1 );
189
CV_Assert( origLabels.type() == labels.type() );
190
191
vector<int> labelsMap;
192
bool isFlt = labels.type() == CV_32FC1;
193
if( !labelsEquivalent )
194
{
195
if( !getLabelsMap( labels, sizes, labelsMap, checkClusterUniq ) )
196
return false;
197
198
for( int i = 0; i < labels.rows; i++ )
199
if( isFlt )
200
err += labels.at<float>(i) != labelsMap[(int)origLabels.at<float>(i)] ? 1.f : 0.f;
201
else
202
err += labels.at<int>(i) != labelsMap[origLabels.at<int>(i)] ? 1.f : 0.f;
203
}
204
else
205
{
206
for( int i = 0; i < labels.rows; i++ )
207
if( isFlt )
208
err += labels.at<float>(i) != origLabels.at<float>(i) ? 1.f : 0.f;
209
else
210
err += labels.at<int>(i) != origLabels.at<int>(i) ? 1.f : 0.f;
211
}
212
err /= (float)labels.rows;
213
return true;
214
}
215
216
//--------------------------------------------------------------------------------------------
217
class CV_KMeansTest : public cvtest::BaseTest {
218
public:
219
CV_KMeansTest() {}
220
protected:
221
virtual void run( int start_from );
222
};
223
224
void CV_KMeansTest::run( int /*start_from*/ )
225
{
226
CV_TRACE_FUNCTION();
227
const int iters = 100;
228
int sizesArr[] = { 5000, 7000, 8000 };
229
int pointsCount = sizesArr[0]+ sizesArr[1] + sizesArr[2];
230
231
Mat data( pointsCount, 2, CV_32FC1 ), labels;
232
vector<int> sizes( sizesArr, sizesArr + sizeof(sizesArr) / sizeof(sizesArr[0]) );
233
Mat means;
234
vector<Mat> covs;
235
defaultDistribs( means, covs );
236
generateData( data, labels, sizes, means, covs, CV_32FC1, CV_32SC1 );
237
238
int code = cvtest::TS::OK;
239
float err;
240
Mat bestLabels;
241
// 1. flag==KMEANS_PP_CENTERS
242
kmeans( data, 3, bestLabels, TermCriteria( TermCriteria::COUNT, iters, 0.0), 0, KMEANS_PP_CENTERS, noArray() );
243
if( !calcErr( bestLabels, labels, sizes, err , false ) )
244
{
245
ts->printf( cvtest::TS::LOG, "Bad output labels if flag==KMEANS_PP_CENTERS.\n" );
246
code = cvtest::TS::FAIL_INVALID_OUTPUT;
247
}
248
else if( err > 0.01f )
249
{
250
ts->printf( cvtest::TS::LOG, "Bad accuracy (%f) if flag==KMEANS_PP_CENTERS.\n", err );
251
code = cvtest::TS::FAIL_BAD_ACCURACY;
252
}
253
254
// 2. flag==KMEANS_RANDOM_CENTERS
255
kmeans( data, 3, bestLabels, TermCriteria( TermCriteria::COUNT, iters, 0.0), 0, KMEANS_RANDOM_CENTERS, noArray() );
256
if( !calcErr( bestLabels, labels, sizes, err, false ) )
257
{
258
ts->printf( cvtest::TS::LOG, "Bad output labels if flag==KMEANS_RANDOM_CENTERS.\n" );
259
code = cvtest::TS::FAIL_INVALID_OUTPUT;
260
}
261
else if( err > 0.01f )
262
{
263
ts->printf( cvtest::TS::LOG, "Bad accuracy (%f) if flag==KMEANS_RANDOM_CENTERS.\n", err );
264
code = cvtest::TS::FAIL_BAD_ACCURACY;
265
}
266
267
// 3. flag==KMEANS_USE_INITIAL_LABELS
268
labels.copyTo( bestLabels );
269
RNG rng;
270
for( int i = 0; i < 0.5f * pointsCount; i++ )
271
bestLabels.at<int>( rng.next() % pointsCount, 0 ) = rng.next() % 3;
272
kmeans( data, 3, bestLabels, TermCriteria( TermCriteria::COUNT, iters, 0.0), 0, KMEANS_USE_INITIAL_LABELS, noArray() );
273
if( !calcErr( bestLabels, labels, sizes, err, false ) )
274
{
275
ts->printf( cvtest::TS::LOG, "Bad output labels if flag==KMEANS_USE_INITIAL_LABELS.\n" );
276
code = cvtest::TS::FAIL_INVALID_OUTPUT;
277
}
278
else if( err > 0.01f )
279
{
280
ts->printf( cvtest::TS::LOG, "Bad accuracy (%f) if flag==KMEANS_USE_INITIAL_LABELS.\n", err );
281
code = cvtest::TS::FAIL_BAD_ACCURACY;
282
}
283
284
ts->set_failed_test_info( code );
285
}
286
287
//--------------------------------------------------------------------------------------------
288
class CV_KNearestTest : public cvtest::BaseTest {
289
public:
290
CV_KNearestTest() {}
291
protected:
292
virtual void run( int start_from );
293
};
294
295
void CV_KNearestTest::run( int /*start_from*/ )
296
{
297
int sizesArr[] = { 500, 700, 800 };
298
int pointsCount = sizesArr[0]+ sizesArr[1] + sizesArr[2];
299
300
// train data
301
Mat trainData( pointsCount, 2, CV_32FC1 ), trainLabels;
302
vector<int> sizes( sizesArr, sizesArr + sizeof(sizesArr) / sizeof(sizesArr[0]) );
303
Mat means;
304
vector<Mat> covs;
305
defaultDistribs( means, covs );
306
generateData( trainData, trainLabels, sizes, means, covs, CV_32FC1, CV_32FC1 );
307
308
// test data
309
Mat testData( pointsCount, 2, CV_32FC1 ), testLabels, bestLabels;
310
generateData( testData, testLabels, sizes, means, covs, CV_32FC1, CV_32FC1 );
311
312
int code = cvtest::TS::OK;
313
314
// KNearest default implementation
315
Ptr<KNearest> knearest = KNearest::create();
316
knearest->train(trainData, ml::ROW_SAMPLE, trainLabels);
317
knearest->findNearest(testData, 4, bestLabels);
318
float err;
319
if( !calcErr( bestLabels, testLabels, sizes, err, true ) )
320
{
321
ts->printf( cvtest::TS::LOG, "Bad output labels.\n" );
322
code = cvtest::TS::FAIL_INVALID_OUTPUT;
323
}
324
else if( err > 0.01f )
325
{
326
ts->printf( cvtest::TS::LOG, "Bad accuracy (%f) on test data.\n", err );
327
code = cvtest::TS::FAIL_BAD_ACCURACY;
328
}
329
330
// KNearest KDTree implementation
331
Ptr<KNearest> knearestKdt = KNearest::create();
332
knearestKdt->setAlgorithmType(KNearest::KDTREE);
333
knearestKdt->train(trainData, ml::ROW_SAMPLE, trainLabels);
334
knearestKdt->findNearest(testData, 4, bestLabels);
335
if( !calcErr( bestLabels, testLabels, sizes, err, true ) )
336
{
337
ts->printf( cvtest::TS::LOG, "Bad output labels.\n" );
338
code = cvtest::TS::FAIL_INVALID_OUTPUT;
339
}
340
else if( err > 0.01f )
341
{
342
ts->printf( cvtest::TS::LOG, "Bad accuracy (%f) on test data.\n", err );
343
code = cvtest::TS::FAIL_BAD_ACCURACY;
344
}
345
346
ts->set_failed_test_info( code );
347
}
348
349
class EM_Params
350
{
351
public:
352
EM_Params(int _nclusters=10, int _covMatType=EM::COV_MAT_DIAGONAL, int _startStep=EM::START_AUTO_STEP,
353
const cv::TermCriteria& _termCrit=cv::TermCriteria(cv::TermCriteria::COUNT+cv::TermCriteria::EPS, 100, FLT_EPSILON),
354
const cv::Mat* _probs=0, const cv::Mat* _weights=0,
355
const cv::Mat* _means=0, const std::vector<cv::Mat>* _covs=0)
356
: nclusters(_nclusters), covMatType(_covMatType), startStep(_startStep),
357
probs(_probs), weights(_weights), means(_means), covs(_covs), termCrit(_termCrit)
358
{}
359
360
int nclusters;
361
int covMatType;
362
int startStep;
363
364
// all 4 following matrices should have type CV_32FC1
365
const cv::Mat* probs;
366
const cv::Mat* weights;
367
const cv::Mat* means;
368
const std::vector<cv::Mat>* covs;
369
370
cv::TermCriteria termCrit;
371
};
372
373
//--------------------------------------------------------------------------------------------
374
class CV_EMTest : public cvtest::BaseTest
375
{
376
public:
377
CV_EMTest() {}
378
protected:
379
virtual void run( int start_from );
380
int runCase( int caseIndex, const EM_Params& params,
381
const cv::Mat& trainData, const cv::Mat& trainLabels,
382
const cv::Mat& testData, const cv::Mat& testLabels,
383
const vector<int>& sizes);
384
};
385
386
int CV_EMTest::runCase( int caseIndex, const EM_Params& params,
387
const cv::Mat& trainData, const cv::Mat& trainLabels,
388
const cv::Mat& testData, const cv::Mat& testLabels,
389
const vector<int>& sizes )
390
{
391
int code = cvtest::TS::OK;
392
393
cv::Mat labels;
394
float err;
395
396
Ptr<EM> em = EM::create();
397
em->setClustersNumber(params.nclusters);
398
em->setCovarianceMatrixType(params.covMatType);
399
em->setTermCriteria(params.termCrit);
400
if( params.startStep == EM::START_AUTO_STEP )
401
em->trainEM( trainData, noArray(), labels, noArray() );
402
else if( params.startStep == EM::START_E_STEP )
403
em->trainE( trainData, *params.means, *params.covs,
404
*params.weights, noArray(), labels, noArray() );
405
else if( params.startStep == EM::START_M_STEP )
406
em->trainM( trainData, *params.probs,
407
noArray(), labels, noArray() );
408
409
// check train error
410
if( !calcErr( labels, trainLabels, sizes, err , false, false ) )
411
{
412
ts->printf( cvtest::TS::LOG, "Case index %i : Bad output labels.\n", caseIndex );
413
code = cvtest::TS::FAIL_INVALID_OUTPUT;
414
}
415
else if( err > 0.008f )
416
{
417
ts->printf( cvtest::TS::LOG, "Case index %i : Bad accuracy (%f) on train data.\n", caseIndex, err );
418
code = cvtest::TS::FAIL_BAD_ACCURACY;
419
}
420
421
// check test error
422
labels.create( testData.rows, 1, CV_32SC1 );
423
for( int i = 0; i < testData.rows; i++ )
424
{
425
Mat sample = testData.row(i);
426
Mat probs;
427
labels.at<int>(i) = static_cast<int>(em->predict2( sample, probs )[1]);
428
}
429
if( !calcErr( labels, testLabels, sizes, err, false, false ) )
430
{
431
ts->printf( cvtest::TS::LOG, "Case index %i : Bad output labels.\n", caseIndex );
432
code = cvtest::TS::FAIL_INVALID_OUTPUT;
433
}
434
else if( err > 0.008f )
435
{
436
ts->printf( cvtest::TS::LOG, "Case index %i : Bad accuracy (%f) on test data.\n", caseIndex, err );
437
code = cvtest::TS::FAIL_BAD_ACCURACY;
438
}
439
440
return code;
441
}
442
443
void CV_EMTest::run( int /*start_from*/ )
444
{
445
int sizesArr[] = { 500, 700, 800 };
446
int pointsCount = sizesArr[0]+ sizesArr[1] + sizesArr[2];
447
448
// Points distribution
449
Mat means;
450
vector<Mat> covs;
451
defaultDistribs( means, covs, CV_64FC1 );
452
453
// train data
454
Mat trainData( pointsCount, 2, CV_64FC1 ), trainLabels;
455
vector<int> sizes( sizesArr, sizesArr + sizeof(sizesArr) / sizeof(sizesArr[0]) );
456
generateData( trainData, trainLabels, sizes, means, covs, CV_64FC1, CV_32SC1 );
457
458
// test data
459
Mat testData( pointsCount, 2, CV_64FC1 ), testLabels;
460
generateData( testData, testLabels, sizes, means, covs, CV_64FC1, CV_32SC1 );
461
462
EM_Params params;
463
params.nclusters = 3;
464
Mat probs(trainData.rows, params.nclusters, CV_64FC1, cv::Scalar(1));
465
params.probs = &probs;
466
Mat weights(1, params.nclusters, CV_64FC1, cv::Scalar(1));
467
params.weights = &weights;
468
params.means = &means;
469
params.covs = &covs;
470
471
int code = cvtest::TS::OK;
472
int caseIndex = 0;
473
{
474
params.startStep = EM::START_AUTO_STEP;
475
params.covMatType = EM::COV_MAT_GENERIC;
476
int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes);
477
code = currCode == cvtest::TS::OK ? code : currCode;
478
}
479
{
480
params.startStep = EM::START_AUTO_STEP;
481
params.covMatType = EM::COV_MAT_DIAGONAL;
482
int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes);
483
code = currCode == cvtest::TS::OK ? code : currCode;
484
}
485
{
486
params.startStep = EM::START_AUTO_STEP;
487
params.covMatType = EM::COV_MAT_SPHERICAL;
488
int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes);
489
code = currCode == cvtest::TS::OK ? code : currCode;
490
}
491
{
492
params.startStep = EM::START_M_STEP;
493
params.covMatType = EM::COV_MAT_GENERIC;
494
int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes);
495
code = currCode == cvtest::TS::OK ? code : currCode;
496
}
497
{
498
params.startStep = EM::START_M_STEP;
499
params.covMatType = EM::COV_MAT_DIAGONAL;
500
int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes);
501
code = currCode == cvtest::TS::OK ? code : currCode;
502
}
503
{
504
params.startStep = EM::START_M_STEP;
505
params.covMatType = EM::COV_MAT_SPHERICAL;
506
int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes);
507
code = currCode == cvtest::TS::OK ? code : currCode;
508
}
509
{
510
params.startStep = EM::START_E_STEP;
511
params.covMatType = EM::COV_MAT_GENERIC;
512
int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes);
513
code = currCode == cvtest::TS::OK ? code : currCode;
514
}
515
{
516
params.startStep = EM::START_E_STEP;
517
params.covMatType = EM::COV_MAT_DIAGONAL;
518
int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes);
519
code = currCode == cvtest::TS::OK ? code : currCode;
520
}
521
{
522
params.startStep = EM::START_E_STEP;
523
params.covMatType = EM::COV_MAT_SPHERICAL;
524
int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes);
525
code = currCode == cvtest::TS::OK ? code : currCode;
526
}
527
528
ts->set_failed_test_info( code );
529
}
530
531
class CV_EMTest_SaveLoad : public cvtest::BaseTest {
532
public:
533
CV_EMTest_SaveLoad() {}
534
protected:
535
virtual void run( int /*start_from*/ )
536
{
537
int code = cvtest::TS::OK;
538
const int nclusters = 2;
539
540
Mat samples = Mat(3,1,CV_64FC1);
541
samples.at<double>(0,0) = 1;
542
samples.at<double>(1,0) = 2;
543
samples.at<double>(2,0) = 3;
544
545
Mat labels;
546
547
Ptr<EM> em = EM::create();
548
em->setClustersNumber(nclusters);
549
em->trainEM(samples, noArray(), labels, noArray());
550
551
Mat firstResult(samples.rows, 1, CV_32SC1);
552
for( int i = 0; i < samples.rows; i++)
553
firstResult.at<int>(i) = static_cast<int>(em->predict2(samples.row(i), noArray())[1]);
554
555
// Write out
556
string filename = cv::tempfile(".xml");
557
{
558
FileStorage fs = FileStorage(filename, FileStorage::WRITE);
559
try
560
{
561
fs << "em" << "{";
562
em->write(fs);
563
fs << "}";
564
}
565
catch(...)
566
{
567
ts->printf( cvtest::TS::LOG, "Crash in write method.\n" );
568
ts->set_failed_test_info( cvtest::TS::FAIL_EXCEPTION );
569
}
570
}
571
572
em.release();
573
574
// Read in
575
try
576
{
577
em = Algorithm::load<EM>(filename);
578
}
579
catch(...)
580
{
581
ts->printf( cvtest::TS::LOG, "Crash in read method.\n" );
582
ts->set_failed_test_info( cvtest::TS::FAIL_EXCEPTION );
583
}
584
585
remove( filename.c_str() );
586
587
int errCaseCount = 0;
588
for( int i = 0; i < samples.rows; i++)
589
errCaseCount = std::abs(em->predict2(samples.row(i), noArray())[1] - firstResult.at<int>(i)) < FLT_EPSILON ? 0 : 1;
590
591
if( errCaseCount > 0 )
592
{
593
ts->printf( cvtest::TS::LOG, "Different prediction results before writing and after reading (errCaseCount=%d).\n", errCaseCount );
594
code = cvtest::TS::FAIL_BAD_ACCURACY;
595
}
596
597
ts->set_failed_test_info( code );
598
}
599
};
600
601
class CV_EMTest_Classification : public cvtest::BaseTest
602
{
603
public:
604
CV_EMTest_Classification() {}
605
protected:
606
virtual void run(int)
607
{
608
// This test classifies spam by the following way:
609
// 1. estimates distributions of "spam" / "not spam"
610
// 2. predict classID using Bayes classifier for estimated distributions.
611
612
string dataFilename = string(ts->get_data_path()) + "spambase.data";
613
Ptr<TrainData> data = TrainData::loadFromCSV(dataFilename, 0);
614
615
if( data.empty() )
616
{
617
ts->printf(cvtest::TS::LOG, "File with spambase dataset cann't be read.\n");
618
ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_TEST_DATA);
619
return;
620
}
621
622
Mat samples = data->getSamples();
623
CV_Assert(samples.cols == 57);
624
Mat responses = data->getResponses();
625
626
vector<int> trainSamplesMask(samples.rows, 0);
627
int trainSamplesCount = (int)(0.5f * samples.rows);
628
for(int i = 0; i < trainSamplesCount; i++)
629
trainSamplesMask[i] = 1;
630
RNG rng(0);
631
for(size_t i = 0; i < trainSamplesMask.size(); i++)
632
{
633
int i1 = rng(static_cast<unsigned>(trainSamplesMask.size()));
634
int i2 = rng(static_cast<unsigned>(trainSamplesMask.size()));
635
std::swap(trainSamplesMask[i1], trainSamplesMask[i2]);
636
}
637
638
Mat samples0, samples1;
639
for(int i = 0; i < samples.rows; i++)
640
{
641
if(trainSamplesMask[i])
642
{
643
Mat sample = samples.row(i);
644
int resp = (int)responses.at<float>(i);
645
if(resp == 0)
646
samples0.push_back(sample);
647
else
648
samples1.push_back(sample);
649
}
650
}
651
Ptr<EM> model0 = EM::create();
652
model0->setClustersNumber(3);
653
model0->trainEM(samples0, noArray(), noArray(), noArray());
654
655
Ptr<EM> model1 = EM::create();
656
model1->setClustersNumber(3);
657
model1->trainEM(samples1, noArray(), noArray(), noArray());
658
659
Mat trainConfusionMat(2, 2, CV_32SC1, Scalar(0)),
660
testConfusionMat(2, 2, CV_32SC1, Scalar(0));
661
const double lambda = 1.;
662
for(int i = 0; i < samples.rows; i++)
663
{
664
Mat sample = samples.row(i);
665
double sampleLogLikelihoods0 = model0->predict2(sample, noArray())[0];
666
double sampleLogLikelihoods1 = model1->predict2(sample, noArray())[0];
667
668
int classID = sampleLogLikelihoods0 >= lambda * sampleLogLikelihoods1 ? 0 : 1;
669
670
if(trainSamplesMask[i])
671
trainConfusionMat.at<int>((int)responses.at<float>(i), classID)++;
672
else
673
testConfusionMat.at<int>((int)responses.at<float>(i), classID)++;
674
}
675
// std::cout << trainConfusionMat << std::endl;
676
// std::cout << testConfusionMat << std::endl;
677
678
double trainError = (double)(trainConfusionMat.at<int>(1,0) + trainConfusionMat.at<int>(0,1)) / trainSamplesCount;
679
double testError = (double)(testConfusionMat.at<int>(1,0) + testConfusionMat.at<int>(0,1)) / (samples.rows - trainSamplesCount);
680
const double maxTrainError = 0.23;
681
const double maxTestError = 0.26;
682
683
int code = cvtest::TS::OK;
684
if(trainError > maxTrainError)
685
{
686
ts->printf(cvtest::TS::LOG, "Too large train classification error (calc = %f, valid=%f).\n", trainError, maxTrainError);
687
code = cvtest::TS::FAIL_INVALID_TEST_DATA;
688
}
689
if(testError > maxTestError)
690
{
691
ts->printf(cvtest::TS::LOG, "Too large test classification error (calc = %f, valid=%f).\n", testError, maxTestError);
692
code = cvtest::TS::FAIL_INVALID_TEST_DATA;
693
}
694
695
ts->set_failed_test_info(code);
696
}
697
};
698
699
TEST(ML_KMeans, accuracy) { CV_KMeansTest test; test.safe_run(); }
700
TEST(ML_KNearest, accuracy) { CV_KNearestTest test; test.safe_run(); }
701
TEST(ML_EM, accuracy) { CV_EMTest test; test.safe_run(); }
702
TEST(ML_EM, save_load) { CV_EMTest_SaveLoad test; test.safe_run(); }
703
TEST(ML_EM, classification) { CV_EMTest_Classification test; test.safe_run(); }
704
705
TEST(ML_KNearest, regression_12347)
706
{
707
Mat xTrainData = (Mat_<float>(5,2) << 1, 1.1, 1.1, 1, 2, 2, 2.1, 2, 2.1, 2.1);
708
Mat yTrainLabels = (Mat_<float>(5,1) << 1, 1, 2, 2, 2);
709
Ptr<KNearest> knn = KNearest::create();
710
knn->train(xTrainData, ml::ROW_SAMPLE, yTrainLabels);
711
712
Mat xTestData = (Mat_<float>(2,2) << 1.1, 1.1, 2, 2.2);
713
Mat zBestLabels, neighbours, dist;
714
// check output shapes:
715
int K = 16, Kexp = std::min(K, xTrainData.rows);
716
knn->findNearest(xTestData, K, zBestLabels, neighbours, dist);
717
EXPECT_EQ(xTestData.rows, zBestLabels.rows);
718
EXPECT_EQ(neighbours.cols, Kexp);
719
EXPECT_EQ(dist.cols, Kexp);
720
// see if the result is still correct:
721
K = 2;
722
knn->findNearest(xTestData, K, zBestLabels, neighbours, dist);
723
EXPECT_EQ(1, zBestLabels.at<float>(0,0));
724
EXPECT_EQ(2, zBestLabels.at<float>(1,0));
725
}
726
727
}} // namespace
728
729