CoCalc -- letter

GitHub Repository: Tetragramm/opencv
Path: blob/master/samples/python/letter_recog.py
¹⁶³³⁷ views
1
#!/usr/bin/env python
2

3
'''
4
The sample demonstrates how to train Random Trees classifier
5
(or Boosting classifier, or MLP, or Knearest, or Support Vector Machines) using the provided dataset.
6

7
We use the sample database letter-recognition.data
8
from UCI Repository, here is the link:
9

10
Newman, D.J. & Hettich, S. & Blake, C.L. & Merz, C.J. (1998).
11
UCI Repository of machine learning databases
12
[http://www.ics.uci.edu/~mlearn/MLRepository.html].
13
Irvine, CA: University of California, Department of Information and Computer Science.
14

15
The dataset consists of 20000 feature vectors along with the
16
responses - capital latin letters A..Z.
17
The first 10000 samples are used for training
18
and the remaining 10000 - to test the classifier.
19
======================================================
20
USAGE:
21
  letter_recog.py [--model <model>]
22
                  [--data <data fn>]
23
                  [--load <model fn>] [--save <model fn>]
24

25
  Models: RTrees, KNearest, Boost, SVM, MLP
26
'''
27

28
# Python 2/3 compatibility
29
from __future__ import print_function
30

31
import numpy as np
32
import cv2 as cv
33

34
def load_base(fn):
35
    a = np.loadtxt(fn, np.float32, delimiter=',', converters={ 0 : lambda ch : ord(ch)-ord('A') })
36
    samples, responses = a[:,1:], a[:,0]
37
    return samples, responses
38

39
class LetterStatModel(object):
40
    class_n = 26
41
    train_ratio = 0.5
42

43
    def load(self, fn):
44
        self.model.load(fn)
45
    def save(self, fn):
46
        self.model.save(fn)
47

48
    def unroll_samples(self, samples):
49
        sample_n, var_n = samples.shape
50
        new_samples = np.zeros((sample_n * self.class_n, var_n+1), np.float32)
51
        new_samples[:,:-1] = np.repeat(samples, self.class_n, axis=0)
52
        new_samples[:,-1] = np.tile(np.arange(self.class_n), sample_n)
53
        return new_samples
54

55
    def unroll_responses(self, responses):
56
        sample_n = len(responses)
57
        new_responses = np.zeros(sample_n*self.class_n, np.int32)
58
        resp_idx = np.int32( responses + np.arange(sample_n)*self.class_n )
59
        new_responses[resp_idx] = 1
60
        return new_responses
61

62
class RTrees(LetterStatModel):
63
    def __init__(self):
64
        self.model = cv.ml.RTrees_create()
65

66
    def train(self, samples, responses):
67
        self.model.setMaxDepth(20)
68
        self.model.train(samples, cv.ml.ROW_SAMPLE, responses.astype(int))
69

70
    def predict(self, samples):
71
        _ret, resp = self.model.predict(samples)
72
        return resp.ravel()
73

74

75
class KNearest(LetterStatModel):
76
    def __init__(self):
77
        self.model = cv.ml.KNearest_create()
78

79
    def train(self, samples, responses):
80
        self.model.train(samples, cv.ml.ROW_SAMPLE, responses)
81

82
    def predict(self, samples):
83
        _retval, results, _neigh_resp, _dists = self.model.findNearest(samples, k = 10)
84
        return results.ravel()
85

86

87
class Boost(LetterStatModel):
88
    def __init__(self):
89
        self.model = cv.ml.Boost_create()
90

91
    def train(self, samples, responses):
92
        _sample_n, var_n = samples.shape
93
        new_samples = self.unroll_samples(samples)
94
        new_responses = self.unroll_responses(responses)
95
        var_types = np.array([cv.ml.VAR_NUMERICAL] * var_n + [cv.ml.VAR_CATEGORICAL, cv.ml.VAR_CATEGORICAL], np.uint8)
96

97
        self.model.setWeakCount(15)
98
        self.model.setMaxDepth(10)
99
        self.model.train(cv.ml.TrainData_create(new_samples, cv.ml.ROW_SAMPLE, new_responses.astype(int), varType = var_types))
100

101
    def predict(self, samples):
102
        new_samples = self.unroll_samples(samples)
103
        _ret, resp = self.model.predict(new_samples)
104

105
        return resp.ravel().reshape(-1, self.class_n).argmax(1)
106

107

108
class SVM(LetterStatModel):
109
    def __init__(self):
110
        self.model = cv.ml.SVM_create()
111

112
    def train(self, samples, responses):
113
        self.model.setType(cv.ml.SVM_C_SVC)
114
        self.model.setC(1)
115
        self.model.setKernel(cv.ml.SVM_RBF)
116
        self.model.setGamma(.1)
117
        self.model.train(samples, cv.ml.ROW_SAMPLE, responses.astype(int))
118

119
    def predict(self, samples):
120
        _ret, resp = self.model.predict(samples)
121
        return resp.ravel()
122

123

124
class MLP(LetterStatModel):
125
    def __init__(self):
126
        self.model = cv.ml.ANN_MLP_create()
127

128
    def train(self, samples, responses):
129
        _sample_n, var_n = samples.shape
130
        new_responses = self.unroll_responses(responses).reshape(-1, self.class_n)
131
        layer_sizes = np.int32([var_n, 100, 100, self.class_n])
132

133
        self.model.setLayerSizes(layer_sizes)
134
        self.model.setTrainMethod(cv.ml.ANN_MLP_BACKPROP)
135
        self.model.setBackpropMomentumScale(0.0)
136
        self.model.setBackpropWeightScale(0.001)
137
        self.model.setTermCriteria((cv.TERM_CRITERIA_COUNT, 20, 0.01))
138
        self.model.setActivationFunction(cv.ml.ANN_MLP_SIGMOID_SYM, 2, 1)
139

140
        self.model.train(samples, cv.ml.ROW_SAMPLE, np.float32(new_responses))
141

142
    def predict(self, samples):
143
        _ret, resp = self.model.predict(samples)
144
        return resp.argmax(-1)
145

146

147

148
if __name__ == '__main__':
149
    import getopt
150
    import sys
151

152
    print(__doc__)
153

154
    models = [RTrees, KNearest, Boost, SVM, MLP] # NBayes
155
    models = dict( [(cls.__name__.lower(), cls) for cls in models] )
156

157

158
    args, dummy = getopt.getopt(sys.argv[1:], '', ['model=', 'data=', 'load=', 'save='])
159
    args = dict(args)
160
    args.setdefault('--model', 'svm')
161
    args.setdefault('--data', '../data/letter-recognition.data')
162

163
    print('loading data %s ...' % args['--data'])
164
    samples, responses = load_base(args['--data'])
165
    Model = models[args['--model']]
166
    model = Model()
167

168
    train_n = int(len(samples)*model.train_ratio)
169
    if '--load' in args:
170
        fn = args['--load']
171
        print('loading model from %s ...' % fn)
172
        model.load(fn)
173
    else:
174
        print('training %s ...' % Model.__name__)
175
        model.train(samples[:train_n], responses[:train_n])
176

177
    print('testing...')
178
    train_rate = np.mean(model.predict(samples[:train_n]) == responses[:train_n].astype(int))
179
    test_rate  = np.mean(model.predict(samples[train_n:]) == responses[train_n:].astype(int))
180

181
    print('train rate: %f  test rate: %f' % (train_rate*100, test_rate*100))
182

183
    if '--save' in args:
184
        fn = args['--save']
185
        print('saving model to %s ...' % fn)
186
        model.save(fn)
187
    cv.destroyAllWindows()
188

189
Product

Resources

Company