Path: blob/master/modules/python/test/test_letter_recog.py
16337 views
#!/usr/bin/env python12'''3The sample demonstrates how to train Random Trees classifier4(or Boosting classifier, or MLP, or Knearest, or Support Vector Machines) using the provided dataset.56We use the sample database letter-recognition.data7from UCI Repository, here is the link:89Newman, D.J. & Hettich, S. & Blake, C.L. & Merz, C.J. (1998).10UCI Repository of machine learning databases11[http://www.ics.uci.edu/~mlearn/MLRepository.html].12Irvine, CA: University of California, Department of Information and Computer Science.1314The dataset consists of 20000 feature vectors along with the15responses - capital latin letters A..Z.16The first 10000 samples are used for training17and the remaining 10000 - to test the classifier.18======================================================19Models: RTrees, KNearest, Boost, SVM, MLP20'''2122# Python 2/3 compatibility23from __future__ import print_function2425import numpy as np26import cv2 as cv2728def load_base(fn):29a = np.loadtxt(fn, np.float32, delimiter=',', converters={ 0 : lambda ch : ord(ch)-ord('A') })30samples, responses = a[:,1:], a[:,0]31return samples, responses3233class LetterStatModel(object):34class_n = 2635train_ratio = 0.53637def load(self, fn):38self.model.load(fn)39def save(self, fn):40self.model.save(fn)4142def unroll_samples(self, samples):43sample_n, var_n = samples.shape44new_samples = np.zeros((sample_n * self.class_n, var_n+1), np.float32)45new_samples[:,:-1] = np.repeat(samples, self.class_n, axis=0)46new_samples[:,-1] = np.tile(np.arange(self.class_n), sample_n)47return new_samples4849def unroll_responses(self, responses):50sample_n = len(responses)51new_responses = np.zeros(sample_n*self.class_n, np.int32)52resp_idx = np.int32( responses + np.arange(sample_n)*self.class_n )53new_responses[resp_idx] = 154return new_responses5556class RTrees(LetterStatModel):57def __init__(self):58self.model = cv.ml.RTrees_create()5960def train(self, samples, responses):61#sample_n, var_n = samples.shape62self.model.setMaxDepth(20)63self.model.train(samples, cv.ml.ROW_SAMPLE, responses.astype(int))6465def predict(self, samples):66_ret, resp = self.model.predict(samples)67return resp.ravel()686970class KNearest(LetterStatModel):71def __init__(self):72self.model = cv.ml.KNearest_create()7374def train(self, samples, responses):75self.model.train(samples, cv.ml.ROW_SAMPLE, responses)7677def predict(self, samples):78_retval, results, _neigh_resp, _dists = self.model.findNearest(samples, k = 10)79return results.ravel()808182class Boost(LetterStatModel):83def __init__(self):84self.model = cv.ml.Boost_create()8586def train(self, samples, responses):87_sample_n, var_n = samples.shape88new_samples = self.unroll_samples(samples)89new_responses = self.unroll_responses(responses)90var_types = np.array([cv.ml.VAR_NUMERICAL] * var_n + [cv.ml.VAR_CATEGORICAL, cv.ml.VAR_CATEGORICAL], np.uint8)9192self.model.setWeakCount(15)93self.model.setMaxDepth(10)94self.model.train(cv.ml.TrainData_create(new_samples, cv.ml.ROW_SAMPLE, new_responses.astype(int), varType = var_types))9596def predict(self, samples):97new_samples = self.unroll_samples(samples)98_ret, resp = self.model.predict(new_samples)99100return resp.ravel().reshape(-1, self.class_n).argmax(1)101102103class SVM(LetterStatModel):104def __init__(self):105self.model = cv.ml.SVM_create()106107def train(self, samples, responses):108self.model.setType(cv.ml.SVM_C_SVC)109self.model.setC(1)110self.model.setKernel(cv.ml.SVM_RBF)111self.model.setGamma(.1)112self.model.train(samples, cv.ml.ROW_SAMPLE, responses.astype(int))113114def predict(self, samples):115_ret, resp = self.model.predict(samples)116return resp.ravel()117118119class MLP(LetterStatModel):120def __init__(self):121self.model = cv.ml.ANN_MLP_create()122123def train(self, samples, responses):124_sample_n, var_n = samples.shape125new_responses = self.unroll_responses(responses).reshape(-1, self.class_n)126layer_sizes = np.int32([var_n, 100, 100, self.class_n])127128self.model.setLayerSizes(layer_sizes)129self.model.setTrainMethod(cv.ml.ANN_MLP_BACKPROP)130self.model.setBackpropMomentumScale(0)131self.model.setBackpropWeightScale(0.001)132self.model.setTermCriteria((cv.TERM_CRITERIA_COUNT, 20, 0.01))133self.model.setActivationFunction(cv.ml.ANN_MLP_SIGMOID_SYM, 2, 1)134135self.model.train(samples, cv.ml.ROW_SAMPLE, np.float32(new_responses))136137def predict(self, samples):138_ret, resp = self.model.predict(samples)139return resp.argmax(-1)140141from tests_common import NewOpenCVTests142143class letter_recog_test(NewOpenCVTests):144145def test_letter_recog(self):146147eps = 0.01148149models = [RTrees, KNearest, Boost, SVM, MLP]150models = dict( [(cls.__name__.lower(), cls) for cls in models] )151testErrors = {RTrees: (98.930000, 92.390000), KNearest: (94.960000, 92.010000),152Boost: (85.970000, 74.920000), SVM: (99.780000, 95.680000), MLP: (90.060000, 87.410000)}153154for model in models:155Model = models[model]156classifier = Model()157158samples, responses = load_base(self.repoPath + '/samples/data/letter-recognition.data')159train_n = int(len(samples)*classifier.train_ratio)160161classifier.train(samples[:train_n], responses[:train_n])162train_rate = np.mean(classifier.predict(samples[:train_n]) == responses[:train_n].astype(int))163test_rate = np.mean(classifier.predict(samples[train_n:]) == responses[train_n:].astype(int))164165self.assertLess(train_rate - testErrors[Model][0], eps)166self.assertLess(test_rate - testErrors[Model][1], eps)167168169if __name__ == '__main__':170NewOpenCVTests.bootstrap()171172173