Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/samples/python/letter_recog.py
16337 views
1
#!/usr/bin/env python
2
3
'''
4
The sample demonstrates how to train Random Trees classifier
5
(or Boosting classifier, or MLP, or Knearest, or Support Vector Machines) using the provided dataset.
6
7
We use the sample database letter-recognition.data
8
from UCI Repository, here is the link:
9
10
Newman, D.J. & Hettich, S. & Blake, C.L. & Merz, C.J. (1998).
11
UCI Repository of machine learning databases
12
[http://www.ics.uci.edu/~mlearn/MLRepository.html].
13
Irvine, CA: University of California, Department of Information and Computer Science.
14
15
The dataset consists of 20000 feature vectors along with the
16
responses - capital latin letters A..Z.
17
The first 10000 samples are used for training
18
and the remaining 10000 - to test the classifier.
19
======================================================
20
USAGE:
21
letter_recog.py [--model <model>]
22
[--data <data fn>]
23
[--load <model fn>] [--save <model fn>]
24
25
Models: RTrees, KNearest, Boost, SVM, MLP
26
'''
27
28
# Python 2/3 compatibility
29
from __future__ import print_function
30
31
import numpy as np
32
import cv2 as cv
33
34
def load_base(fn):
35
a = np.loadtxt(fn, np.float32, delimiter=',', converters={ 0 : lambda ch : ord(ch)-ord('A') })
36
samples, responses = a[:,1:], a[:,0]
37
return samples, responses
38
39
class LetterStatModel(object):
40
class_n = 26
41
train_ratio = 0.5
42
43
def load(self, fn):
44
self.model.load(fn)
45
def save(self, fn):
46
self.model.save(fn)
47
48
def unroll_samples(self, samples):
49
sample_n, var_n = samples.shape
50
new_samples = np.zeros((sample_n * self.class_n, var_n+1), np.float32)
51
new_samples[:,:-1] = np.repeat(samples, self.class_n, axis=0)
52
new_samples[:,-1] = np.tile(np.arange(self.class_n), sample_n)
53
return new_samples
54
55
def unroll_responses(self, responses):
56
sample_n = len(responses)
57
new_responses = np.zeros(sample_n*self.class_n, np.int32)
58
resp_idx = np.int32( responses + np.arange(sample_n)*self.class_n )
59
new_responses[resp_idx] = 1
60
return new_responses
61
62
class RTrees(LetterStatModel):
63
def __init__(self):
64
self.model = cv.ml.RTrees_create()
65
66
def train(self, samples, responses):
67
self.model.setMaxDepth(20)
68
self.model.train(samples, cv.ml.ROW_SAMPLE, responses.astype(int))
69
70
def predict(self, samples):
71
_ret, resp = self.model.predict(samples)
72
return resp.ravel()
73
74
75
class KNearest(LetterStatModel):
76
def __init__(self):
77
self.model = cv.ml.KNearest_create()
78
79
def train(self, samples, responses):
80
self.model.train(samples, cv.ml.ROW_SAMPLE, responses)
81
82
def predict(self, samples):
83
_retval, results, _neigh_resp, _dists = self.model.findNearest(samples, k = 10)
84
return results.ravel()
85
86
87
class Boost(LetterStatModel):
88
def __init__(self):
89
self.model = cv.ml.Boost_create()
90
91
def train(self, samples, responses):
92
_sample_n, var_n = samples.shape
93
new_samples = self.unroll_samples(samples)
94
new_responses = self.unroll_responses(responses)
95
var_types = np.array([cv.ml.VAR_NUMERICAL] * var_n + [cv.ml.VAR_CATEGORICAL, cv.ml.VAR_CATEGORICAL], np.uint8)
96
97
self.model.setWeakCount(15)
98
self.model.setMaxDepth(10)
99
self.model.train(cv.ml.TrainData_create(new_samples, cv.ml.ROW_SAMPLE, new_responses.astype(int), varType = var_types))
100
101
def predict(self, samples):
102
new_samples = self.unroll_samples(samples)
103
_ret, resp = self.model.predict(new_samples)
104
105
return resp.ravel().reshape(-1, self.class_n).argmax(1)
106
107
108
class SVM(LetterStatModel):
109
def __init__(self):
110
self.model = cv.ml.SVM_create()
111
112
def train(self, samples, responses):
113
self.model.setType(cv.ml.SVM_C_SVC)
114
self.model.setC(1)
115
self.model.setKernel(cv.ml.SVM_RBF)
116
self.model.setGamma(.1)
117
self.model.train(samples, cv.ml.ROW_SAMPLE, responses.astype(int))
118
119
def predict(self, samples):
120
_ret, resp = self.model.predict(samples)
121
return resp.ravel()
122
123
124
class MLP(LetterStatModel):
125
def __init__(self):
126
self.model = cv.ml.ANN_MLP_create()
127
128
def train(self, samples, responses):
129
_sample_n, var_n = samples.shape
130
new_responses = self.unroll_responses(responses).reshape(-1, self.class_n)
131
layer_sizes = np.int32([var_n, 100, 100, self.class_n])
132
133
self.model.setLayerSizes(layer_sizes)
134
self.model.setTrainMethod(cv.ml.ANN_MLP_BACKPROP)
135
self.model.setBackpropMomentumScale(0.0)
136
self.model.setBackpropWeightScale(0.001)
137
self.model.setTermCriteria((cv.TERM_CRITERIA_COUNT, 20, 0.01))
138
self.model.setActivationFunction(cv.ml.ANN_MLP_SIGMOID_SYM, 2, 1)
139
140
self.model.train(samples, cv.ml.ROW_SAMPLE, np.float32(new_responses))
141
142
def predict(self, samples):
143
_ret, resp = self.model.predict(samples)
144
return resp.argmax(-1)
145
146
147
148
if __name__ == '__main__':
149
import getopt
150
import sys
151
152
print(__doc__)
153
154
models = [RTrees, KNearest, Boost, SVM, MLP] # NBayes
155
models = dict( [(cls.__name__.lower(), cls) for cls in models] )
156
157
158
args, dummy = getopt.getopt(sys.argv[1:], '', ['model=', 'data=', 'load=', 'save='])
159
args = dict(args)
160
args.setdefault('--model', 'svm')
161
args.setdefault('--data', '../data/letter-recognition.data')
162
163
print('loading data %s ...' % args['--data'])
164
samples, responses = load_base(args['--data'])
165
Model = models[args['--model']]
166
model = Model()
167
168
train_n = int(len(samples)*model.train_ratio)
169
if '--load' in args:
170
fn = args['--load']
171
print('loading model from %s ...' % fn)
172
model.load(fn)
173
else:
174
print('training %s ...' % Model.__name__)
175
model.train(samples[:train_n], responses[:train_n])
176
177
print('testing...')
178
train_rate = np.mean(model.predict(samples[:train_n]) == responses[:train_n].astype(int))
179
test_rate = np.mean(model.predict(samples[train_n:]) == responses[train_n:].astype(int))
180
181
print('train rate: %f test rate: %f' % (train_rate*100, test_rate*100))
182
183
if '--save' in args:
184
fn = args['--save']
185
print('saving model to %s ...' % fn)
186
model.save(fn)
187
cv.destroyAllWindows()
188
189