Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
aimacode
GitHub Repository: aimacode/aima-python
Path: blob/master/learning.py
615 views
1
"""Learning from examples (Chapters 18)"""
2
3
import copy
4
from collections import defaultdict
5
from statistics import stdev
6
7
from qpsolvers import solve_qp
8
9
from probabilistic_learning import NaiveBayesLearner
10
from utils import *
11
12
13
class DataSet:
14
"""
15
A data set for a machine learning problem. It has the following fields:
16
17
d.examples A list of examples. Each one is a list of attribute values.
18
d.attrs A list of integers to index into an example, so example[attr]
19
gives a value. Normally the same as range(len(d.examples[0])).
20
d.attr_names Optional list of mnemonic names for corresponding attrs.
21
d.target The attribute that a learning algorithm will try to predict.
22
By default the final attribute.
23
d.inputs The list of attrs without the target.
24
d.values A list of lists: each sublist is the set of possible
25
values for the corresponding attribute. If initially None,
26
it is computed from the known examples by self.set_problem.
27
If not None, an erroneous value raises ValueError.
28
d.distance A function from a pair of examples to a non-negative number.
29
Should be symmetric, etc. Defaults to mean_boolean_error
30
since that can handle any field types.
31
d.name Name of the data set (for output display only).
32
d.source URL or other source where the data came from.
33
d.exclude A list of attribute indexes to exclude from d.inputs. Elements
34
of this list can either be integers (attrs) or attr_names.
35
36
Normally, you call the constructor and you're done; then you just
37
access fields like d.examples and d.target and d.inputs.
38
"""
39
40
def __init__(self, examples=None, attrs=None, attr_names=None, target=-1, inputs=None,
41
values=None, distance=mean_boolean_error, name='', source='', exclude=()):
42
"""
43
Accepts any of DataSet's fields. Examples can also be a
44
string or file from which to parse examples using parse_csv.
45
Optional parameter: exclude, as documented in .set_problem().
46
>>> DataSet(examples='1, 2, 3')
47
<DataSet(): 1 examples, 3 attributes>
48
"""
49
self.name = name
50
self.source = source
51
self.values = values
52
self.distance = distance
53
self.got_values_flag = bool(values)
54
55
# initialize .examples from string or list or data directory
56
if isinstance(examples, str):
57
self.examples = parse_csv(examples)
58
elif examples is None:
59
self.examples = parse_csv(open_data(name + '.csv').read())
60
else:
61
self.examples = examples
62
63
# attrs are the indices of examples, unless otherwise stated.
64
if self.examples is not None and attrs is None:
65
attrs = list(range(len(self.examples[0])))
66
67
self.attrs = attrs
68
69
# initialize .attr_names from string, list, or by default
70
if isinstance(attr_names, str):
71
self.attr_names = attr_names.split()
72
else:
73
self.attr_names = attr_names or attrs
74
self.set_problem(target, inputs=inputs, exclude=exclude)
75
76
def set_problem(self, target, inputs=None, exclude=()):
77
"""
78
Set (or change) the target and/or inputs.
79
This way, one DataSet can be used multiple ways. inputs, if specified,
80
is a list of attributes, or specify exclude as a list of attributes
81
to not use in inputs. Attributes can be -n .. n, or an attr_name.
82
Also computes the list of possible values, if that wasn't done yet.
83
"""
84
self.target = self.attr_num(target)
85
exclude = list(map(self.attr_num, exclude))
86
if inputs:
87
self.inputs = remove_all(self.target, inputs)
88
else:
89
self.inputs = [a for a in self.attrs if a != self.target and a not in exclude]
90
if not self.values:
91
self.update_values()
92
self.check_me()
93
94
def check_me(self):
95
"""Check that my fields make sense."""
96
assert len(self.attr_names) == len(self.attrs)
97
assert self.target in self.attrs
98
assert self.target not in self.inputs
99
assert set(self.inputs).issubset(set(self.attrs))
100
if self.got_values_flag:
101
# only check if values are provided while initializing DataSet
102
list(map(self.check_example, self.examples))
103
104
def add_example(self, example):
105
"""Add an example to the list of examples, checking it first."""
106
self.check_example(example)
107
self.examples.append(example)
108
109
def check_example(self, example):
110
"""Raise ValueError if example has any invalid values."""
111
if self.values:
112
for a in self.attrs:
113
if example[a] not in self.values[a]:
114
raise ValueError('Bad value {} for attribute {} in {}'
115
.format(example[a], self.attr_names[a], example))
116
117
def attr_num(self, attr):
118
"""Returns the number used for attr, which can be a name, or -n .. n-1."""
119
if isinstance(attr, str):
120
return self.attr_names.index(attr)
121
elif attr < 0:
122
return len(self.attrs) + attr
123
else:
124
return attr
125
126
def update_values(self):
127
self.values = list(map(unique, zip(*self.examples)))
128
129
def sanitize(self, example):
130
"""Return a copy of example, with non-input attributes replaced by None."""
131
return [attr_i if i in self.inputs else None for i, attr_i in enumerate(example)]
132
133
def classes_to_numbers(self, classes=None):
134
"""Converts class names to numbers."""
135
if not classes:
136
# if classes were not given, extract them from values
137
classes = sorted(self.values[self.target])
138
for item in self.examples:
139
item[self.target] = classes.index(item[self.target])
140
141
def remove_examples(self, value=''):
142
"""Remove examples that contain given value."""
143
self.examples = [x for x in self.examples if value not in x]
144
self.update_values()
145
146
def split_values_by_classes(self):
147
"""Split values into buckets according to their class."""
148
buckets = defaultdict(lambda: [])
149
target_names = self.values[self.target]
150
151
for v in self.examples:
152
item = [a for a in v if a not in target_names] # remove target from item
153
buckets[v[self.target]].append(item) # add item to bucket of its class
154
155
return buckets
156
157
def find_means_and_deviations(self):
158
"""
159
Finds the means and standard deviations of self.dataset.
160
means : a dictionary for each class/target. Holds a list of the means
161
of the features for the class.
162
deviations: a dictionary for each class/target. Holds a list of the sample
163
standard deviations of the features for the class.
164
"""
165
target_names = self.values[self.target]
166
feature_numbers = len(self.inputs)
167
168
item_buckets = self.split_values_by_classes()
169
170
means = defaultdict(lambda: [0] * feature_numbers)
171
deviations = defaultdict(lambda: [0] * feature_numbers)
172
173
for t in target_names:
174
# find all the item feature values for item in class t
175
features = [[] for _ in range(feature_numbers)]
176
for item in item_buckets[t]:
177
for i in range(feature_numbers):
178
features[i].append(item[i])
179
180
# calculate means and deviations fo the class
181
for i in range(feature_numbers):
182
means[t][i] = mean(features[i])
183
deviations[t][i] = stdev(features[i])
184
185
return means, deviations
186
187
def __repr__(self):
188
return '<DataSet({}): {:d} examples, {:d} attributes>'.format(self.name, len(self.examples), len(self.attrs))
189
190
191
def parse_csv(input, delim=','):
192
r"""
193
Input is a string consisting of lines, each line has comma-delimited
194
fields. Convert this into a list of lists. Blank lines are skipped.
195
Fields that look like numbers are converted to numbers.
196
The delim defaults to ',' but '\t' and None are also reasonable values.
197
>>> parse_csv('1, 2, 3 \n 0, 2, na')
198
[[1, 2, 3], [0, 2, 'na']]
199
"""
200
lines = [line for line in input.splitlines() if line.strip()]
201
return [list(map(num_or_str, line.split(delim))) for line in lines]
202
203
204
def err_ratio(predict, dataset, examples=None):
205
"""
206
Return the proportion of the examples that are NOT correctly predicted.
207
verbose - 0: No output; 1: Output wrong; 2 (or greater): Output correct
208
"""
209
examples = examples or dataset.examples
210
if len(examples) == 0:
211
return 0.0
212
right = 0
213
for example in examples:
214
desired = example[dataset.target]
215
output = predict(dataset.sanitize(example))
216
if output == desired:
217
right += 1
218
return 1 - (right / len(examples))
219
220
221
def grade_learner(predict, tests):
222
"""
223
Grades the given learner based on how many tests it passes.
224
tests is a list with each element in the form: (values, output).
225
"""
226
return mean(int(predict(X) == y) for X, y in tests)
227
228
229
def train_test_split(dataset, start=None, end=None, test_split=None):
230
"""
231
If you are giving 'start' and 'end' as parameters,
232
then it will return the testing set from index 'start' to 'end'
233
and the rest for training.
234
If you give 'test_split' as a parameter then it will return
235
test_split * 100% as the testing set and the rest as
236
training set.
237
"""
238
examples = dataset.examples
239
if test_split is None:
240
train = examples[:start] + examples[end:]
241
val = examples[start:end]
242
else:
243
total_size = len(examples)
244
val_size = int(total_size * test_split)
245
train_size = total_size - val_size
246
train = examples[:train_size]
247
val = examples[train_size:total_size]
248
249
return train, val
250
251
252
def cross_validation_wrapper(learner, dataset, k=10, trials=1):
253
"""
254
[Figure 18.8]
255
Return the optimal value of size having minimum error on validation set.
256
errT: a training error array, indexed by size
257
errV: a validation error array, indexed by size
258
"""
259
errs = []
260
size = 1
261
while True:
262
errT, errV = cross_validation(learner, dataset, size, k, trials)
263
# check for convergence provided err_val is not empty
264
if errT and not np.isclose(errT[-1], errT, rtol=1e-6):
265
best_size = 0
266
min_val = np.inf
267
i = 0
268
while i < size:
269
if errs[i] < min_val:
270
min_val = errs[i]
271
best_size = i
272
i += 1
273
return learner(dataset, best_size)
274
errs.append(errV)
275
size += 1
276
277
278
def cross_validation(learner, dataset, size=None, k=10, trials=1):
279
"""
280
Do k-fold cross_validate and return their mean.
281
That is, keep out 1/k of the examples for testing on each of k runs.
282
Shuffle the examples first; if trials > 1, average over several shuffles.
283
Returns Training error, Validation error
284
"""
285
k = k or len(dataset.examples)
286
if trials > 1:
287
trial_errT = 0
288
trial_errV = 0
289
for t in range(trials):
290
errT, errV = cross_validation(learner, dataset, size, k, trials)
291
trial_errT += errT
292
trial_errV += errV
293
return trial_errT / trials, trial_errV / trials
294
else:
295
fold_errT = 0
296
fold_errV = 0
297
n = len(dataset.examples)
298
examples = dataset.examples
299
random.shuffle(dataset.examples)
300
for fold in range(k):
301
train_data, val_data = train_test_split(dataset, fold * (n // k), (fold + 1) * (n // k))
302
dataset.examples = train_data
303
h = learner(dataset, size)
304
fold_errT += err_ratio(h, dataset, train_data)
305
fold_errV += err_ratio(h, dataset, val_data)
306
# reverting back to original once test is completed
307
dataset.examples = examples
308
return fold_errT / k, fold_errV / k
309
310
311
def leave_one_out(learner, dataset, size=None):
312
"""Leave one out cross-validation over the dataset."""
313
return cross_validation(learner, dataset, size, len(dataset.examples))
314
315
316
def learning_curve(learner, dataset, trials=10, sizes=None):
317
if sizes is None:
318
sizes = list(range(2, len(dataset.examples) - trials, 2))
319
320
def score(learner, size):
321
random.shuffle(dataset.examples)
322
return cross_validation(learner, dataset, size, trials)
323
324
return [(size, mean([score(learner, size) for _ in range(trials)])) for size in sizes]
325
326
327
def PluralityLearner(dataset):
328
"""
329
A very dumb algorithm: always pick the result that was most popular
330
in the training data. Makes a baseline for comparison.
331
"""
332
most_popular = mode([e[dataset.target] for e in dataset.examples])
333
334
def predict(example):
335
"""Always return same result: the most popular from the training set."""
336
return most_popular
337
338
return predict
339
340
341
class DecisionFork:
342
"""
343
A fork of a decision tree holds an attribute to test, and a dict
344
of branches, one for each of the attribute's values.
345
"""
346
347
def __init__(self, attr, attr_name=None, default_child=None, branches=None):
348
"""Initialize by saying what attribute this node tests."""
349
self.attr = attr
350
self.attr_name = attr_name or attr
351
self.default_child = default_child
352
self.branches = branches or {}
353
354
def __call__(self, example):
355
"""Given an example, classify it using the attribute and the branches."""
356
attr_val = example[self.attr]
357
if attr_val in self.branches:
358
return self.branches[attr_val](example)
359
else:
360
# return default class when attribute is unknown
361
return self.default_child(example)
362
363
def add(self, val, subtree):
364
"""Add a branch. If self.attr = val, go to the given subtree."""
365
self.branches[val] = subtree
366
367
def display(self, indent=0):
368
name = self.attr_name
369
print('Test', name)
370
for (val, subtree) in self.branches.items():
371
print(' ' * 4 * indent, name, '=', val, '==>', end=' ')
372
subtree.display(indent + 1)
373
374
def __repr__(self):
375
return 'DecisionFork({0!r}, {1!r}, {2!r})'.format(self.attr, self.attr_name, self.branches)
376
377
378
class DecisionLeaf:
379
"""A leaf of a decision tree holds just a result."""
380
381
def __init__(self, result):
382
self.result = result
383
384
def __call__(self, example):
385
return self.result
386
387
def display(self):
388
print('RESULT =', self.result)
389
390
def __repr__(self):
391
return repr(self.result)
392
393
394
def DecisionTreeLearner(dataset):
395
"""[Figure 18.5]"""
396
397
target, values = dataset.target, dataset.values
398
399
def decision_tree_learning(examples, attrs, parent_examples=()):
400
if len(examples) == 0:
401
return plurality_value(parent_examples)
402
if all_same_class(examples):
403
return DecisionLeaf(examples[0][target])
404
if len(attrs) == 0:
405
return plurality_value(examples)
406
A = choose_attribute(attrs, examples)
407
tree = DecisionFork(A, dataset.attr_names[A], plurality_value(examples))
408
for (v_k, exs) in split_by(A, examples):
409
subtree = decision_tree_learning(exs, remove_all(A, attrs), examples)
410
tree.add(v_k, subtree)
411
return tree
412
413
def plurality_value(examples):
414
"""
415
Return the most popular target value for this set of examples.
416
(If target is binary, this is the majority; otherwise plurality).
417
"""
418
popular = argmax_random_tie(values[target], key=lambda v: count(target, v, examples))
419
return DecisionLeaf(popular)
420
421
def count(attr, val, examples):
422
"""Count the number of examples that have example[attr] = val."""
423
return sum(e[attr] == val for e in examples)
424
425
def all_same_class(examples):
426
"""Are all these examples in the same target class?"""
427
class0 = examples[0][target]
428
return all(e[target] == class0 for e in examples)
429
430
def choose_attribute(attrs, examples):
431
"""Choose the attribute with the highest information gain."""
432
return argmax_random_tie(attrs, key=lambda a: information_gain(a, examples))
433
434
def information_gain(attr, examples):
435
"""Return the expected reduction in entropy from splitting by attr."""
436
437
def I(examples):
438
return information_content([count(target, v, examples) for v in values[target]])
439
440
n = len(examples)
441
remainder = sum((len(examples_i) / n) * I(examples_i) for (v, examples_i) in split_by(attr, examples))
442
return I(examples) - remainder
443
444
def split_by(attr, examples):
445
"""Return a list of (val, examples) pairs for each val of attr."""
446
return [(v, [e for e in examples if e[attr] == v]) for v in values[attr]]
447
448
return decision_tree_learning(dataset.examples, dataset.inputs)
449
450
451
def information_content(values):
452
"""Number of bits to represent the probability distribution in values."""
453
probabilities = normalize(remove_all(0, values))
454
return sum(-p * np.log2(p) for p in probabilities)
455
456
457
def DecisionListLearner(dataset):
458
"""
459
[Figure 18.11]
460
A decision list implemented as a list of (test, value) pairs.
461
"""
462
463
def decision_list_learning(examples):
464
if not examples:
465
return [(True, False)]
466
t, o, examples_t = find_examples(examples)
467
if not t:
468
raise Exception
469
return [(t, o)] + decision_list_learning(examples - examples_t)
470
471
def find_examples(examples):
472
"""
473
Find a set of examples that all have the same outcome under
474
some test. Return a tuple of the test, outcome, and examples.
475
"""
476
raise NotImplementedError
477
478
def passes(example, test):
479
"""Does the example pass the test?"""
480
raise NotImplementedError
481
482
def predict(example):
483
"""Predict the outcome for the first passing test."""
484
for test, outcome in predict.decision_list:
485
if passes(example, test):
486
return outcome
487
488
predict.decision_list = decision_list_learning(set(dataset.examples))
489
490
return predict
491
492
493
def NearestNeighborLearner(dataset, k=1):
494
"""k-NearestNeighbor: the k nearest neighbors vote."""
495
496
def predict(example):
497
"""Find the k closest items, and have them vote for the best."""
498
best = heapq.nsmallest(k, ((dataset.distance(e, example), e) for e in dataset.examples))
499
return mode(e[dataset.target] for (d, e) in best)
500
501
return predict
502
503
504
def LinearLearner(dataset, learning_rate=0.01, epochs=100):
505
"""
506
[Section 18.6.3]
507
Linear classifier with hard threshold.
508
"""
509
idx_i = dataset.inputs
510
idx_t = dataset.target
511
examples = dataset.examples
512
num_examples = len(examples)
513
514
# X transpose
515
X_col = [dataset.values[i] for i in idx_i] # vertical columns of X
516
517
# add dummy
518
ones = [1 for _ in range(len(examples))]
519
X_col = [ones] + X_col
520
521
# initialize random weights
522
num_weights = len(idx_i) + 1
523
w = random_weights(min_value=-0.5, max_value=0.5, num_weights=num_weights)
524
525
for epoch in range(epochs):
526
err = []
527
# pass over all examples
528
for example in examples:
529
x = [1] + example
530
y = np.dot(w, x)
531
t = example[idx_t]
532
err.append(t - y)
533
534
# update weights
535
for i in range(len(w)):
536
w[i] = w[i] + learning_rate * (np.dot(err, X_col[i]) / num_examples)
537
538
def predict(example):
539
x = [1] + example
540
return np.dot(w, x)
541
542
return predict
543
544
545
def LogisticLinearLeaner(dataset, learning_rate=0.01, epochs=100):
546
"""
547
[Section 18.6.4]
548
Linear classifier with logistic regression.
549
"""
550
idx_i = dataset.inputs
551
idx_t = dataset.target
552
examples = dataset.examples
553
num_examples = len(examples)
554
555
# X transpose
556
X_col = [dataset.values[i] for i in idx_i] # vertical columns of X
557
558
# add dummy
559
ones = [1 for _ in range(len(examples))]
560
X_col = [ones] + X_col
561
562
# initialize random weights
563
num_weights = len(idx_i) + 1
564
w = random_weights(min_value=-0.5, max_value=0.5, num_weights=num_weights)
565
566
for epoch in range(epochs):
567
err = []
568
h = []
569
# pass over all examples
570
for example in examples:
571
x = [1] + example
572
y = sigmoid(np.dot(w, x))
573
h.append(sigmoid_derivative(y))
574
t = example[idx_t]
575
err.append(t - y)
576
577
# update weights
578
for i in range(len(w)):
579
buffer = [x * y for x, y in zip(err, h)]
580
w[i] = w[i] + learning_rate * (np.dot(buffer, X_col[i]) / num_examples)
581
582
def predict(example):
583
x = [1] + example
584
return sigmoid(np.dot(w, x))
585
586
return predict
587
588
589
def NeuralNetLearner(dataset, hidden_layer_sizes=None, learning_rate=0.01, epochs=100, activation=sigmoid):
590
"""
591
Layered feed-forward network.
592
hidden_layer_sizes: List of number of hidden units per hidden layer
593
learning_rate: Learning rate of gradient descent
594
epochs: Number of passes over the dataset
595
"""
596
597
if hidden_layer_sizes is None:
598
hidden_layer_sizes = [3]
599
i_units = len(dataset.inputs)
600
o_units = len(dataset.values[dataset.target])
601
602
# construct a network
603
raw_net = network(i_units, hidden_layer_sizes, o_units, activation)
604
learned_net = BackPropagationLearner(dataset, raw_net, learning_rate, epochs, activation)
605
606
def predict(example):
607
# input nodes
608
i_nodes = learned_net[0]
609
610
# activate input layer
611
for v, n in zip(example, i_nodes):
612
n.value = v
613
614
# forward pass
615
for layer in learned_net[1:]:
616
for node in layer:
617
inc = [n.value for n in node.inputs]
618
in_val = dot_product(inc, node.weights)
619
node.value = node.activation(in_val)
620
621
# hypothesis
622
o_nodes = learned_net[-1]
623
prediction = find_max_node(o_nodes)
624
return prediction
625
626
return predict
627
628
629
def BackPropagationLearner(dataset, net, learning_rate, epochs, activation=sigmoid):
630
"""
631
[Figure 18.23]
632
The back-propagation algorithm for multilayer networks.
633
"""
634
# initialise weights
635
for layer in net:
636
for node in layer:
637
node.weights = random_weights(min_value=-0.5, max_value=0.5, num_weights=len(node.weights))
638
639
examples = dataset.examples
640
# As of now dataset.target gives an int instead of list,
641
# Changing dataset class will have effect on all the learners.
642
# Will be taken care of later.
643
o_nodes = net[-1]
644
i_nodes = net[0]
645
o_units = len(o_nodes)
646
idx_t = dataset.target
647
idx_i = dataset.inputs
648
n_layers = len(net)
649
650
inputs, targets = init_examples(examples, idx_i, idx_t, o_units)
651
652
for epoch in range(epochs):
653
# iterate over each example
654
for e in range(len(examples)):
655
i_val = inputs[e]
656
t_val = targets[e]
657
658
# activate input layer
659
for v, n in zip(i_val, i_nodes):
660
n.value = v
661
662
# forward pass
663
for layer in net[1:]:
664
for node in layer:
665
inc = [n.value for n in node.inputs]
666
in_val = dot_product(inc, node.weights)
667
node.value = node.activation(in_val)
668
669
# initialize delta
670
delta = [[] for _ in range(n_layers)]
671
672
# compute outer layer delta
673
674
# error for the MSE cost function
675
err = [t_val[i] - o_nodes[i].value for i in range(o_units)]
676
677
# calculate delta at output
678
if node.activation == sigmoid:
679
delta[-1] = [sigmoid_derivative(o_nodes[i].value) * err[i] for i in range(o_units)]
680
elif node.activation == relu:
681
delta[-1] = [relu_derivative(o_nodes[i].value) * err[i] for i in range(o_units)]
682
elif node.activation == tanh:
683
delta[-1] = [tanh_derivative(o_nodes[i].value) * err[i] for i in range(o_units)]
684
elif node.activation == elu:
685
delta[-1] = [elu_derivative(o_nodes[i].value) * err[i] for i in range(o_units)]
686
elif node.activation == leaky_relu:
687
delta[-1] = [leaky_relu_derivative(o_nodes[i].value) * err[i] for i in range(o_units)]
688
else:
689
return ValueError("Activation function unknown.")
690
691
# backward pass
692
h_layers = n_layers - 2
693
for i in range(h_layers, 0, -1):
694
layer = net[i]
695
h_units = len(layer)
696
nx_layer = net[i + 1]
697
698
# weights from each ith layer node to each i + 1th layer node
699
w = [[node.weights[k] for node in nx_layer] for k in range(h_units)]
700
701
if activation == sigmoid:
702
delta[i] = [sigmoid_derivative(layer[j].value) * dot_product(w[j], delta[i + 1])
703
for j in range(h_units)]
704
elif activation == relu:
705
delta[i] = [relu_derivative(layer[j].value) * dot_product(w[j], delta[i + 1])
706
for j in range(h_units)]
707
elif activation == tanh:
708
delta[i] = [tanh_derivative(layer[j].value) * dot_product(w[j], delta[i + 1])
709
for j in range(h_units)]
710
elif activation == elu:
711
delta[i] = [elu_derivative(layer[j].value) * dot_product(w[j], delta[i + 1])
712
for j in range(h_units)]
713
elif activation == leaky_relu:
714
delta[i] = [leaky_relu_derivative(layer[j].value) * dot_product(w[j], delta[i + 1])
715
for j in range(h_units)]
716
else:
717
return ValueError("Activation function unknown.")
718
719
# update weights
720
for i in range(1, n_layers):
721
layer = net[i]
722
inc = [node.value for node in net[i - 1]]
723
units = len(layer)
724
for j in range(units):
725
layer[j].weights = vector_add(layer[j].weights,
726
scalar_vector_product(learning_rate * delta[i][j], inc))
727
728
return net
729
730
731
def PerceptronLearner(dataset, learning_rate=0.01, epochs=100):
732
"""Logistic Regression, NO hidden layer"""
733
i_units = len(dataset.inputs)
734
o_units = len(dataset.values[dataset.target])
735
hidden_layer_sizes = []
736
raw_net = network(i_units, hidden_layer_sizes, o_units)
737
learned_net = BackPropagationLearner(dataset, raw_net, learning_rate, epochs)
738
739
def predict(example):
740
o_nodes = learned_net[1]
741
742
# forward pass
743
for node in o_nodes:
744
in_val = dot_product(example, node.weights)
745
node.value = node.activation(in_val)
746
747
# hypothesis
748
return find_max_node(o_nodes)
749
750
return predict
751
752
753
class NNUnit:
754
"""
755
Single Unit of Multiple Layer Neural Network
756
inputs: Incoming connections
757
weights: Weights to incoming connections
758
"""
759
760
def __init__(self, activation=sigmoid, weights=None, inputs=None):
761
self.weights = weights or []
762
self.inputs = inputs or []
763
self.value = None
764
self.activation = activation
765
766
767
def network(input_units, hidden_layer_sizes, output_units, activation=sigmoid):
768
"""
769
Create Directed Acyclic Network of given number layers.
770
hidden_layers_sizes : List number of neuron units in each hidden layer
771
excluding input and output layers
772
"""
773
layers_sizes = [input_units] + hidden_layer_sizes + [output_units]
774
775
net = [[NNUnit(activation) for _ in range(size)] for size in layers_sizes]
776
n_layers = len(net)
777
778
# make connection
779
for i in range(1, n_layers):
780
for n in net[i]:
781
for k in net[i - 1]:
782
n.inputs.append(k)
783
n.weights.append(0)
784
return net
785
786
787
def init_examples(examples, idx_i, idx_t, o_units):
788
inputs, targets = {}, {}
789
790
for i, e in enumerate(examples):
791
# input values of e
792
inputs[i] = [e[i] for i in idx_i]
793
794
if o_units > 1:
795
# one-hot representation of e's target
796
t = [0 for i in range(o_units)]
797
t[e[idx_t]] = 1
798
targets[i] = t
799
else:
800
# target value of e
801
targets[i] = [e[idx_t]]
802
803
return inputs, targets
804
805
806
def find_max_node(nodes):
807
return nodes.index(max(nodes, key=lambda node: node.value))
808
809
810
class SVC:
811
812
def __init__(self, kernel=linear_kernel, C=1.0, verbose=False):
813
self.kernel = kernel
814
self.C = C # hyper-parameter
815
self.sv_idx, self.sv, self.sv_y = np.zeros(0), np.zeros(0), np.zeros(0)
816
self.alphas = np.zeros(0)
817
self.w = None
818
self.b = 0.0 # intercept
819
self.verbose = verbose
820
821
def fit(self, X, y):
822
"""
823
Trains the model by solving a quadratic programming problem.
824
:param X: array of size [n_samples, n_features] holding the training samples
825
:param y: array of size [n_samples] holding the class labels
826
"""
827
# In QP formulation (dual): m variables, 2m+1 constraints (1 equation, 2m inequations)
828
self.solve_qp(X, y)
829
sv = self.alphas > 1e-5
830
self.sv_idx = np.arange(len(self.alphas))[sv]
831
self.sv, self.sv_y, self.alphas = X[sv], y[sv], self.alphas[sv]
832
833
if self.kernel == linear_kernel:
834
self.w = np.dot(self.alphas * self.sv_y, self.sv)
835
836
for n in range(len(self.alphas)):
837
self.b += self.sv_y[n]
838
self.b -= np.sum(self.alphas * self.sv_y * self.K[self.sv_idx[n], sv])
839
self.b /= len(self.alphas)
840
return self
841
842
def solve_qp(self, X, y):
843
"""
844
Solves a quadratic programming problem. In QP formulation (dual):
845
m variables, 2m+1 constraints (1 equation, 2m inequations).
846
:param X: array of size [n_samples, n_features] holding the training samples
847
:param y: array of size [n_samples] holding the class labels
848
"""
849
m = len(y) # m = n_samples
850
self.K = self.kernel(X) # gram matrix
851
P = self.K * np.outer(y, y)
852
q = -np.ones(m)
853
lb = np.zeros(m) # lower bounds
854
ub = np.ones(m) * self.C # upper bounds
855
A = y.astype(np.float64) # equality matrix
856
b = np.zeros(1) # equality vector
857
self.alphas = solve_qp(P, q, A=A, b=b, lb=lb, ub=ub, solver='cvxopt',
858
sym_proj=True, verbose=self.verbose)
859
860
def predict_score(self, X):
861
"""
862
Predicts the score for a given example.
863
"""
864
if self.w is None:
865
return np.dot(self.alphas * self.sv_y, self.kernel(self.sv, X)) + self.b
866
return np.dot(X, self.w) + self.b
867
868
def predict(self, X):
869
"""
870
Predicts the class of a given example.
871
"""
872
return np.sign(self.predict_score(X))
873
874
875
class SVR:
876
877
def __init__(self, kernel=linear_kernel, C=1.0, epsilon=0.1, verbose=False):
878
self.kernel = kernel
879
self.C = C # hyper-parameter
880
self.epsilon = epsilon # epsilon insensitive loss value
881
self.sv_idx, self.sv = np.zeros(0), np.zeros(0)
882
self.alphas_p, self.alphas_n = np.zeros(0), np.zeros(0)
883
self.w = None
884
self.b = 0.0 # intercept
885
self.verbose = verbose
886
887
def fit(self, X, y):
888
"""
889
Trains the model by solving a quadratic programming problem.
890
:param X: array of size [n_samples, n_features] holding the training samples
891
:param y: array of size [n_samples] holding the class labels
892
"""
893
# In QP formulation (dual): m variables, 2m+1 constraints (1 equation, 2m inequations)
894
self.solve_qp(X, y)
895
896
sv = np.logical_or(self.alphas_p > 1e-5, self.alphas_n > 1e-5)
897
self.sv_idx = np.arange(len(self.alphas_p))[sv]
898
self.sv, sv_y = X[sv], y[sv]
899
self.alphas_p, self.alphas_n = self.alphas_p[sv], self.alphas_n[sv]
900
901
if self.kernel == linear_kernel:
902
self.w = np.dot(self.alphas_p - self.alphas_n, self.sv)
903
904
for n in range(len(self.alphas_p)):
905
self.b += sv_y[n]
906
self.b -= np.sum((self.alphas_p - self.alphas_n) * self.K[self.sv_idx[n], sv])
907
self.b -= self.epsilon
908
self.b /= len(self.alphas_p)
909
910
return self
911
912
def solve_qp(self, X, y):
913
"""
914
Solves a quadratic programming problem. In QP formulation (dual):
915
m variables, 2m+1 constraints (1 equation, 2m inequations).
916
:param X: array of size [n_samples, n_features] holding the training samples
917
:param y: array of size [n_samples] holding the class labels
918
"""
919
#
920
m = len(y) # m = n_samples
921
self.K = self.kernel(X) # gram matrix
922
P = np.vstack((np.hstack((self.K, -self.K)), # alphas_p, alphas_n
923
np.hstack((-self.K, self.K)))) # alphas_n, alphas_p
924
q = np.hstack((-y, y)) + self.epsilon
925
lb = np.zeros(2 * m) # lower bounds
926
ub = np.ones(2 * m) * self.C # upper bounds
927
A = np.hstack((np.ones(m), -np.ones(m))) # equality matrix
928
b = np.zeros(1) # equality vector
929
alphas = solve_qp(P, q, A=A, b=b, lb=lb, ub=ub, solver='cvxopt',
930
sym_proj=True, verbose=self.verbose)
931
self.alphas_p = alphas[:m]
932
self.alphas_n = alphas[m:]
933
934
def predict(self, X):
935
if self.kernel != linear_kernel:
936
return np.dot(self.alphas_p - self.alphas_n, self.kernel(self.sv, X)) + self.b
937
return np.dot(X, self.w) + self.b
938
939
940
class MultiClassLearner:
941
942
def __init__(self, clf, decision_function='ovr'):
943
self.clf = clf
944
self.decision_function = decision_function
945
self.n_class, self.classifiers = 0, []
946
947
def fit(self, X, y):
948
"""
949
Trains n_class or n_class * (n_class - 1) / 2 classifiers
950
according to the training method, ovr or ovo respectively.
951
:param X: array of size [n_samples, n_features] holding the training samples
952
:param y: array of size [n_samples] holding the class labels
953
:return: array of classifiers
954
"""
955
labels = np.unique(y)
956
self.n_class = len(labels)
957
if self.decision_function == 'ovr': # one-vs-rest method
958
for label in labels:
959
y1 = np.array(y)
960
y1[y1 != label] = -1.0
961
y1[y1 == label] = 1.0
962
self.clf.fit(X, y1)
963
self.classifiers.append(copy.deepcopy(self.clf))
964
elif self.decision_function == 'ovo': # use one-vs-one method
965
n_labels = len(labels)
966
for i in range(n_labels):
967
for j in range(i + 1, n_labels):
968
neg_id, pos_id = y == labels[i], y == labels[j]
969
X1, y1 = np.r_[X[neg_id], X[pos_id]], np.r_[y[neg_id], y[pos_id]]
970
y1[y1 == labels[i]] = -1.0
971
y1[y1 == labels[j]] = 1.0
972
self.clf.fit(X1, y1)
973
self.classifiers.append(copy.deepcopy(self.clf))
974
else:
975
return ValueError("Decision function must be either 'ovr' or 'ovo'.")
976
return self
977
978
def predict(self, X):
979
"""
980
Predicts the class of a given example according to the training method.
981
"""
982
n_samples = len(X)
983
if self.decision_function == 'ovr': # one-vs-rest method
984
assert len(self.classifiers) == self.n_class
985
score = np.zeros((n_samples, self.n_class))
986
for i in range(self.n_class):
987
clf = self.classifiers[i]
988
score[:, i] = clf.predict_score(X)
989
return np.argmax(score, axis=1)
990
elif self.decision_function == 'ovo': # use one-vs-one method
991
assert len(self.classifiers) == self.n_class * (self.n_class - 1) / 2
992
vote = np.zeros((n_samples, self.n_class))
993
clf_id = 0
994
for i in range(self.n_class):
995
for j in range(i + 1, self.n_class):
996
res = self.classifiers[clf_id].predict(X)
997
vote[res < 0, i] += 1.0 # negative sample: class i
998
vote[res > 0, j] += 1.0 # positive sample: class j
999
clf_id += 1
1000
return np.argmax(vote, axis=1)
1001
else:
1002
return ValueError("Decision function must be either 'ovr' or 'ovo'.")
1003
1004
1005
def EnsembleLearner(learners):
1006
"""Given a list of learning algorithms, have them vote."""
1007
1008
def train(dataset):
1009
predictors = [learner(dataset) for learner in learners]
1010
1011
def predict(example):
1012
return mode(predictor(example) for predictor in predictors)
1013
1014
return predict
1015
1016
return train
1017
1018
1019
def ada_boost(dataset, L, K):
1020
"""[Figure 18.34]"""
1021
1022
examples, target = dataset.examples, dataset.target
1023
n = len(examples)
1024
eps = 1 / (2 * n)
1025
w = [1 / n] * n
1026
h, z = [], []
1027
for k in range(K):
1028
h_k = L(dataset, w)
1029
h.append(h_k)
1030
error = sum(weight for example, weight in zip(examples, w) if example[target] != h_k(example))
1031
# avoid divide-by-0 from either 0% or 100% error rates
1032
error = np.clip(error, eps, 1 - eps)
1033
for j, example in enumerate(examples):
1034
if example[target] == h_k(example):
1035
w[j] *= error / (1 - error)
1036
w = normalize(w)
1037
z.append(np.log((1 - error) / error))
1038
return weighted_majority(h, z)
1039
1040
1041
def weighted_majority(predictors, weights):
1042
"""Return a predictor that takes a weighted vote."""
1043
1044
def predict(example):
1045
return weighted_mode((predictor(example) for predictor in predictors), weights)
1046
1047
return predict
1048
1049
1050
def weighted_mode(values, weights):
1051
"""
1052
Return the value with the greatest total weight.
1053
>>> weighted_mode('abbaa', [1, 2, 3, 1, 2])
1054
'b'
1055
"""
1056
totals = defaultdict(int)
1057
for v, w in zip(values, weights):
1058
totals[v] += w
1059
return max(totals, key=totals.__getitem__)
1060
1061
1062
def RandomForest(dataset, n=5):
1063
"""An ensemble of Decision Trees trained using bagging and feature bagging."""
1064
1065
def data_bagging(dataset, m=0):
1066
"""Sample m examples with replacement"""
1067
n = len(dataset.examples)
1068
return weighted_sample_with_replacement(m or n, dataset.examples, [1] * n)
1069
1070
def feature_bagging(dataset, p=0.7):
1071
"""Feature bagging with probability p to retain an attribute"""
1072
inputs = [i for i in dataset.inputs if probability(p)]
1073
return inputs or dataset.inputs
1074
1075
def predict(example):
1076
print([predictor(example) for predictor in predictors])
1077
return mode(predictor(example) for predictor in predictors)
1078
1079
predictors = [DecisionTreeLearner(DataSet(examples=data_bagging(dataset), attrs=dataset.attrs,
1080
attr_names=dataset.attr_names, target=dataset.target,
1081
inputs=feature_bagging(dataset))) for _ in range(n)]
1082
1083
return predict
1084
1085
1086
def WeightedLearner(unweighted_learner):
1087
"""
1088
[Page 749 footnote 14]
1089
Given a learner that takes just an unweighted dataset, return
1090
one that takes also a weight for each example.
1091
"""
1092
1093
def train(dataset, weights):
1094
return unweighted_learner(replicated_dataset(dataset, weights))
1095
1096
return train
1097
1098
1099
def replicated_dataset(dataset, weights, n=None):
1100
"""Copy dataset, replicating each example in proportion to its weight."""
1101
n = n or len(dataset.examples)
1102
result = copy.copy(dataset)
1103
result.examples = weighted_replicate(dataset.examples, weights, n)
1104
return result
1105
1106
1107
def weighted_replicate(seq, weights, n):
1108
"""
1109
Return n selections from seq, with the count of each element of
1110
seq proportional to the corresponding weight (filling in fractions
1111
randomly).
1112
>>> weighted_replicate('ABC', [1, 2, 1], 4)
1113
['A', 'B', 'B', 'C']
1114
"""
1115
assert len(seq) == len(weights)
1116
weights = normalize(weights)
1117
wholes = [int(w * n) for w in weights]
1118
fractions = [(w * n) % 1 for w in weights]
1119
return (flatten([x] * nx for x, nx in zip(seq, wholes)) +
1120
weighted_sample_with_replacement(n - sum(wholes), seq, fractions))
1121
1122
1123
# metrics
1124
1125
def accuracy_score(y_pred, y_true):
1126
assert y_pred.shape == y_true.shape
1127
return np.mean(np.equal(y_pred, y_true))
1128
1129
1130
def r2_score(y_pred, y_true):
1131
assert y_pred.shape == y_true.shape
1132
return 1. - (np.sum(np.square(y_pred - y_true)) / # sum of square of residuals
1133
np.sum(np.square(y_true - np.mean(y_true)))) # total sum of squares
1134
1135
1136
# datasets
1137
1138
orings = DataSet(name='orings', target='Distressed', attr_names='Rings Distressed Temp Pressure Flightnum')
1139
1140
zoo = DataSet(name='zoo', target='type', exclude=['name'],
1141
attr_names='name hair feathers eggs milk airborne aquatic predator toothed backbone '
1142
'breathes venomous fins legs tail domestic catsize type')
1143
1144
iris = DataSet(name='iris', target='class', attr_names='sepal-len sepal-width petal-len petal-width class')
1145
1146
1147
def RestaurantDataSet(examples=None):
1148
"""
1149
[Figure 18.3]
1150
Build a DataSet of Restaurant waiting examples.
1151
"""
1152
return DataSet(name='restaurant', target='Wait', examples=examples,
1153
attr_names='Alternate Bar Fri/Sat Hungry Patrons Price Raining Reservation Type WaitEstimate Wait')
1154
1155
1156
restaurant = RestaurantDataSet()
1157
1158
1159
def T(attr_name, branches):
1160
branches = {value: (child if isinstance(child, DecisionFork) else DecisionLeaf(child))
1161
for value, child in branches.items()}
1162
return DecisionFork(restaurant.attr_num(attr_name), attr_name, print, branches)
1163
1164
1165
"""
1166
[Figure 18.2]
1167
A decision tree for deciding whether to wait for a table at a hotel.
1168
"""
1169
1170
waiting_decision_tree = T('Patrons',
1171
{'None': 'No', 'Some': 'Yes',
1172
'Full': T('WaitEstimate',
1173
{'>60': 'No', '0-10': 'Yes',
1174
'30-60': T('Alternate',
1175
{'No': T('Reservation',
1176
{'Yes': 'Yes',
1177
'No': T('Bar', {'No': 'No',
1178
'Yes': 'Yes'})}),
1179
'Yes': T('Fri/Sat', {'No': 'No', 'Yes': 'Yes'})}),
1180
'10-30': T('Hungry',
1181
{'No': 'Yes',
1182
'Yes': T('Alternate',
1183
{'No': 'Yes',
1184
'Yes': T('Raining',
1185
{'No': 'No',
1186
'Yes': 'Yes'})})})})})
1187
1188
1189
def SyntheticRestaurant(n=20):
1190
"""Generate a DataSet with n examples."""
1191
1192
def gen():
1193
example = list(map(random.choice, restaurant.values))
1194
example[restaurant.target] = waiting_decision_tree(example)
1195
return example
1196
1197
return RestaurantDataSet([gen() for _ in range(n)])
1198
1199
1200
def Majority(k, n):
1201
"""
1202
Return a DataSet with n k-bit examples of the majority problem:
1203
k random bits followed by a 1 if more than half the bits are 1, else 0.
1204
"""
1205
examples = []
1206
for i in range(n):
1207
bits = [random.choice([0, 1]) for _ in range(k)]
1208
bits.append(int(sum(bits) > k / 2))
1209
examples.append(bits)
1210
return DataSet(name='majority', examples=examples)
1211
1212
1213
def Parity(k, n, name='parity'):
1214
"""
1215
Return a DataSet with n k-bit examples of the parity problem:
1216
k random bits followed by a 1 if an odd number of bits are 1, else 0.
1217
"""
1218
examples = []
1219
for i in range(n):
1220
bits = [random.choice([0, 1]) for _ in range(k)]
1221
bits.append(sum(bits) % 2)
1222
examples.append(bits)
1223
return DataSet(name=name, examples=examples)
1224
1225
1226
def Xor(n):
1227
"""Return a DataSet with n examples of 2-input xor."""
1228
return Parity(2, n, name='xor')
1229
1230
1231
def ContinuousXor(n):
1232
"""2 inputs are chosen uniformly from (0.0 .. 2.0]; output is xor of ints."""
1233
examples = []
1234
for i in range(n):
1235
x, y = [random.uniform(0.0, 2.0) for _ in '12']
1236
examples.append([x, y, x != y])
1237
return DataSet(name='continuous xor', examples=examples)
1238
1239
1240
def compare(algorithms=None, datasets=None, k=10, trials=1):
1241
"""
1242
Compare various learners on various datasets using cross-validation.
1243
Print results as a table.
1244
"""
1245
# default list of algorithms
1246
algorithms = algorithms or [PluralityLearner, NaiveBayesLearner, NearestNeighborLearner, DecisionTreeLearner]
1247
1248
# default list of datasets
1249
datasets = datasets or [iris, orings, zoo, restaurant, SyntheticRestaurant(20),
1250
Majority(7, 100), Parity(7, 100), Xor(100)]
1251
1252
print_table([[a.__name__.replace('Learner', '')] + [cross_validation(a, d, k=k, trials=trials) for d in datasets]
1253
for a in algorithms], header=[''] + [d.name[0:7] for d in datasets], numfmt='%.2f')
1254
1255