Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
rasbt
GitHub Repository: rasbt/machine-learning-book
Path: blob/main/ch02/ch02.py
1247 views
1
# coding: utf-8
2
3
4
import sys
5
from python_environment_check import check_packages
6
import numpy as np
7
import os
8
import pandas as pd
9
import matplotlib.pyplot as plt
10
from matplotlib.colors import ListedColormap
11
12
# # Machine Learning with PyTorch and Scikit-Learn
13
# # -- Code Examples
14
15
# ## Package version checks
16
17
# Add folder to path in order to load from the check_packages.py script:
18
19
20
21
sys.path.insert(0, '..')
22
23
24
# Check recommended package versions:
25
26
27
28
29
30
d = {
31
'numpy': '1.21.2',
32
'matplotlib': '3.4.3',
33
'pandas': '1.3.2'
34
}
35
check_packages(d)
36
37
38
# # Chapter 2 - Training Machine Learning Algorithms for Classification
39
40
# ### Overview
41
#
42
43
# - [Artificial neurons – a brief glimpse into the early history of machine learning](#Artificial-neurons-a-brief-glimpse-into-the-early-history-of-machine-learning)
44
# - [The formal definition of an artificial neuron](#The-formal-definition-of-an-artificial-neuron)
45
# - [The perceptron learning rule](#The-perceptron-learning-rule)
46
# - [Implementing a perceptron learning algorithm in Python](#Implementing-a-perceptron-learning-algorithm-in-Python)
47
# - [An object-oriented perceptron API](#An-object-oriented-perceptron-API)
48
# - [Training a perceptron model on the Iris dataset](#Training-a-perceptron-model-on-the-Iris-dataset)
49
# - [Adaptive linear neurons and the convergence of learning](#Adaptive-linear-neurons-and-the-convergence-of-learning)
50
# - [Minimizing cost functions with gradient descent](#Minimizing-cost-functions-with-gradient-descent)
51
# - [Implementing an Adaptive Linear Neuron in Python](#Implementing-an-Adaptive-Linear-Neuron-in-Python)
52
# - [Improving gradient descent through feature scaling](#Improving-gradient-descent-through-feature-scaling)
53
# - [Large scale machine learning and stochastic gradient descent](#Large-scale-machine-learning-and-stochastic-gradient-descent)
54
# - [Summary](#Summary)
55
56
57
58
59
60
61
# # Artificial neurons - a brief glimpse into the early history of machine learning
62
63
64
65
66
67
# ## The formal definition of an artificial neuron
68
69
70
71
72
73
# ## The perceptron learning rule
74
75
76
77
78
79
80
81
82
83
84
# # Implementing a perceptron learning algorithm in Python
85
86
# ## An object-oriented perceptron API
87
88
89
90
91
92
class Perceptron:
93
"""Perceptron classifier.
94
95
Parameters
96
------------
97
eta : float
98
Learning rate (between 0.0 and 1.0)
99
n_iter : int
100
Passes over the training dataset.
101
random_state : int
102
Random number generator seed for random weight
103
initialization.
104
105
Attributes
106
-----------
107
w_ : 1d-array
108
Weights after fitting.
109
b_ : Scalar
110
Bias unit after fitting.
111
errors_ : list
112
Number of misclassifications (updates) in each epoch.
113
114
"""
115
def __init__(self, eta=0.01, n_iter=50, random_state=1):
116
self.eta = eta
117
self.n_iter = n_iter
118
self.random_state = random_state
119
120
def fit(self, X, y):
121
"""Fit training data.
122
123
Parameters
124
----------
125
X : {array-like}, shape = [n_examples, n_features]
126
Training vectors, where n_examples is the number of examples and
127
n_features is the number of features.
128
y : array-like, shape = [n_examples]
129
Target values.
130
131
Returns
132
-------
133
self : object
134
135
"""
136
rgen = np.random.RandomState(self.random_state)
137
self.w_ = rgen.normal(loc=0.0, scale=0.01, size=X.shape[1])
138
self.b_ = np.float_(0.)
139
140
self.errors_ = []
141
142
for _ in range(self.n_iter):
143
errors = 0
144
for xi, target in zip(X, y):
145
update = self.eta * (target - self.predict(xi))
146
self.w_ += update * xi
147
self.b_ += update
148
errors += int(update != 0.0)
149
self.errors_.append(errors)
150
return self
151
152
def net_input(self, X):
153
"""Calculate net input"""
154
return np.dot(X, self.w_) + self.b_
155
156
def predict(self, X):
157
"""Return class label after unit step"""
158
return np.where(self.net_input(X) >= 0.0, 1, 0)
159
160
161
162
163
v1 = np.array([1, 2, 3])
164
v2 = 0.5 * v1
165
np.arccos(v1.dot(v2) / (np.linalg.norm(v1) * np.linalg.norm(v2)))
166
167
168
169
# ## Training a perceptron model on the Iris dataset
170
171
# ...
172
173
# ### Reading-in the Iris data
174
175
176
177
178
try:
179
s = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
180
print('From URL:', s)
181
df = pd.read_csv(s,
182
header=None,
183
encoding='utf-8')
184
185
except HTTPError:
186
s = 'iris.data'
187
print('From local Iris path:', s)
188
df = pd.read_csv(s,
189
header=None,
190
encoding='utf-8')
191
192
df.tail()
193
194
195
196
# ### Plotting the Iris data
197
198
199
200
201
# select setosa and versicolor
202
y = df.iloc[0:100, 4].values
203
y = np.where(y == 'Iris-setosa', 0, 1)
204
205
# extract sepal length and petal length
206
X = df.iloc[0:100, [0, 2]].values
207
208
# plot data
209
plt.scatter(X[:50, 0], X[:50, 1],
210
color='red', marker='o', label='Setosa')
211
plt.scatter(X[50:100, 0], X[50:100, 1],
212
color='blue', marker='s', label='Versicolor')
213
214
plt.xlabel('Sepal length [cm]')
215
plt.ylabel('Petal length [cm]')
216
plt.legend(loc='upper left')
217
218
# plt.savefig('images/02_06.png', dpi=300)
219
plt.show()
220
221
222
223
# ### Training the perceptron model
224
225
226
227
ppn = Perceptron(eta=0.1, n_iter=10)
228
229
ppn.fit(X, y)
230
231
plt.plot(range(1, len(ppn.errors_) + 1), ppn.errors_, marker='o')
232
plt.xlabel('Epochs')
233
plt.ylabel('Number of updates')
234
235
# plt.savefig('images/02_07.png', dpi=300)
236
plt.show()
237
238
239
240
# ### A function for plotting decision regions
241
242
243
244
245
246
def plot_decision_regions(X, y, classifier, resolution=0.02):
247
248
# setup marker generator and color map
249
markers = ('o', 's', '^', 'v', '<')
250
colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')
251
cmap = ListedColormap(colors[:len(np.unique(y))])
252
253
# plot the decision surface
254
x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1
255
x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1
256
xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),
257
np.arange(x2_min, x2_max, resolution))
258
lab = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
259
lab = lab.reshape(xx1.shape)
260
plt.contourf(xx1, xx2, lab, alpha=0.3, cmap=cmap)
261
plt.xlim(xx1.min(), xx1.max())
262
plt.ylim(xx2.min(), xx2.max())
263
264
# plot class examples
265
for idx, cl in enumerate(np.unique(y)):
266
plt.scatter(x=X[y == cl, 0],
267
y=X[y == cl, 1],
268
alpha=0.8,
269
c=colors[idx],
270
marker=markers[idx],
271
label=f'Class {cl}',
272
edgecolor='black')
273
274
275
276
277
plot_decision_regions(X, y, classifier=ppn)
278
plt.xlabel('Sepal length [cm]')
279
plt.ylabel('Petal length [cm]')
280
plt.legend(loc='upper left')
281
282
283
#plt.savefig('images/02_08.png', dpi=300)
284
plt.show()
285
286
287
288
# # Adaptive linear neurons and the convergence of learning
289
290
# ...
291
292
# ## Minimizing cost functions with gradient descent
293
294
295
296
297
298
299
300
301
302
303
# ## Implementing an adaptive linear neuron in Python
304
305
306
307
class AdalineGD:
308
"""ADAptive LInear NEuron classifier.
309
310
Parameters
311
------------
312
eta : float
313
Learning rate (between 0.0 and 1.0)
314
n_iter : int
315
Passes over the training dataset.
316
random_state : int
317
Random number generator seed for random weight
318
initialization.
319
320
321
Attributes
322
-----------
323
w_ : 1d-array
324
Weights after fitting.
325
b_ : Scalar
326
Bias unit after fitting.
327
losses_ : list
328
Mean squared eror loss function values in each epoch.
329
330
"""
331
def __init__(self, eta=0.01, n_iter=50, random_state=1):
332
self.eta = eta
333
self.n_iter = n_iter
334
self.random_state = random_state
335
336
def fit(self, X, y):
337
""" Fit training data.
338
339
Parameters
340
----------
341
X : {array-like}, shape = [n_examples, n_features]
342
Training vectors, where n_examples is the number of examples and
343
n_features is the number of features.
344
y : array-like, shape = [n_examples]
345
Target values.
346
347
Returns
348
-------
349
self : object
350
351
"""
352
rgen = np.random.RandomState(self.random_state)
353
self.w_ = rgen.normal(loc=0.0, scale=0.01, size=X.shape[1])
354
self.b_ = np.float_(0.)
355
self.losses_ = []
356
357
for i in range(self.n_iter):
358
net_input = self.net_input(X)
359
# Please note that the "activation" method has no effect
360
# in the code since it is simply an identity function. We
361
# could write `output = self.net_input(X)` directly instead.
362
# The purpose of the activation is more conceptual, i.e.,
363
# in the case of logistic regression (as we will see later),
364
# we could change it to
365
# a sigmoid function to implement a logistic regression classifier.
366
output = self.activation(net_input)
367
errors = (y - output)
368
369
#for w_j in range(self.w_.shape[0]):
370
# self.w_[w_j] += self.eta * (2.0 * (X[:, w_j]*errors)).mean()
371
372
self.w_ += self.eta * 2.0 * X.T.dot(errors) / X.shape[0]
373
self.b_ += self.eta * 2.0 * errors.mean()
374
loss = (errors**2).mean()
375
self.losses_.append(loss)
376
return self
377
378
def net_input(self, X):
379
"""Calculate net input"""
380
return np.dot(X, self.w_) + self.b_
381
382
def activation(self, X):
383
"""Compute linear activation"""
384
return X
385
386
def predict(self, X):
387
"""Return class label after unit step"""
388
return np.where(self.activation(self.net_input(X)) >= 0.5, 1, 0)
389
390
391
392
393
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10, 4))
394
395
ada1 = AdalineGD(n_iter=15, eta=0.1).fit(X, y)
396
ax[0].plot(range(1, len(ada1.losses_) + 1), np.log10(ada1.losses_), marker='o')
397
ax[0].set_xlabel('Epochs')
398
ax[0].set_ylabel('log(Mean squared error)')
399
ax[0].set_title('Adaline - Learning rate 0.1')
400
401
ada2 = AdalineGD(n_iter=15, eta=0.0001).fit(X, y)
402
ax[1].plot(range(1, len(ada2.losses_) + 1), ada2.losses_, marker='o')
403
ax[1].set_xlabel('Epochs')
404
ax[1].set_ylabel('Mean squared error')
405
ax[1].set_title('Adaline - Learning rate 0.0001')
406
407
# plt.savefig('images/02_11.png', dpi=300)
408
plt.show()
409
410
411
412
413
414
415
416
417
# ## Improving gradient descent through feature scaling
418
419
420
421
422
423
424
425
# standardize features
426
X_std = np.copy(X)
427
X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std()
428
X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std()
429
430
431
432
433
ada_gd = AdalineGD(n_iter=20, eta=0.5)
434
ada_gd.fit(X_std, y)
435
436
plot_decision_regions(X_std, y, classifier=ada_gd)
437
plt.title('Adaline - Gradient descent')
438
plt.xlabel('Sepal length [standardized]')
439
plt.ylabel('Petal length [standardized]')
440
plt.legend(loc='upper left')
441
plt.tight_layout()
442
#plt.savefig('images/02_14_1.png', dpi=300)
443
plt.show()
444
445
plt.plot(range(1, len(ada_gd.losses_) + 1), ada_gd.losses_, marker='o')
446
plt.xlabel('Epochs')
447
plt.ylabel('Mean squared error')
448
449
plt.tight_layout()
450
#plt.savefig('images/02_14_2.png', dpi=300)
451
plt.show()
452
453
454
455
# ## Large scale machine learning and stochastic gradient descent
456
457
458
459
class AdalineSGD:
460
"""ADAptive LInear NEuron classifier.
461
462
Parameters
463
------------
464
eta : float
465
Learning rate (between 0.0 and 1.0)
466
n_iter : int
467
Passes over the training dataset.
468
shuffle : bool (default: True)
469
Shuffles training data every epoch if True to prevent cycles.
470
random_state : int
471
Random number generator seed for random weight
472
initialization.
473
474
475
Attributes
476
-----------
477
w_ : 1d-array
478
Weights after fitting.
479
b_ : Scalar
480
Bias unit after fitting.
481
losses_ : list
482
Mean squared error loss function value averaged over all
483
training examples in each epoch.
484
485
486
"""
487
def __init__(self, eta=0.01, n_iter=10, shuffle=True, random_state=None):
488
self.eta = eta
489
self.n_iter = n_iter
490
self.w_initialized = False
491
self.shuffle = shuffle
492
self.random_state = random_state
493
494
def fit(self, X, y):
495
""" Fit training data.
496
497
Parameters
498
----------
499
X : {array-like}, shape = [n_examples, n_features]
500
Training vectors, where n_examples is the number of examples and
501
n_features is the number of features.
502
y : array-like, shape = [n_examples]
503
Target values.
504
505
Returns
506
-------
507
self : object
508
509
"""
510
self._initialize_weights(X.shape[1])
511
self.losses_ = []
512
for i in range(self.n_iter):
513
if self.shuffle:
514
X, y = self._shuffle(X, y)
515
losses = []
516
for xi, target in zip(X, y):
517
losses.append(self._update_weights(xi, target))
518
avg_loss = np.mean(losses)
519
self.losses_.append(avg_loss)
520
return self
521
522
def partial_fit(self, X, y):
523
"""Fit training data without reinitializing the weights"""
524
if not self.w_initialized:
525
self._initialize_weights(X.shape[1])
526
if y.ravel().shape[0] > 1:
527
for xi, target in zip(X, y):
528
self._update_weights(xi, target)
529
else:
530
self._update_weights(X, y)
531
return self
532
533
def _shuffle(self, X, y):
534
"""Shuffle training data"""
535
r = self.rgen.permutation(len(y))
536
return X[r], y[r]
537
538
def _initialize_weights(self, m):
539
"""Initialize weights to small random numbers"""
540
self.rgen = np.random.RandomState(self.random_state)
541
self.w_ = self.rgen.normal(loc=0.0, scale=0.01, size=m)
542
self.b_ = np.float_(0.)
543
self.w_initialized = True
544
545
def _update_weights(self, xi, target):
546
"""Apply Adaline learning rule to update the weights"""
547
output = self.activation(self.net_input(xi))
548
error = (target - output)
549
self.w_ += self.eta * 2.0 * xi * (error)
550
self.b_ += self.eta * 2.0 * error
551
loss = error**2
552
return loss
553
554
def net_input(self, X):
555
"""Calculate net input"""
556
return np.dot(X, self.w_) + self.b_
557
558
def activation(self, X):
559
"""Compute linear activation"""
560
return X
561
562
def predict(self, X):
563
"""Return class label after unit step"""
564
return np.where(self.activation(self.net_input(X)) >= 0.5, 1, 0)
565
566
567
568
569
ada_sgd = AdalineSGD(n_iter=15, eta=0.01, random_state=1)
570
ada_sgd.fit(X_std, y)
571
572
plot_decision_regions(X_std, y, classifier=ada_sgd)
573
plt.title('Adaline - Stochastic gradient descent')
574
plt.xlabel('Sepal length [standardized]')
575
plt.ylabel('Petal length [standardized]')
576
plt.legend(loc='upper left')
577
578
plt.tight_layout()
579
plt.savefig('figures/02_15_1.png', dpi=300)
580
plt.show()
581
582
plt.plot(range(1, len(ada_sgd.losses_) + 1), ada_sgd.losses_, marker='o')
583
plt.xlabel('Epochs')
584
plt.ylabel('Average loss')
585
586
plt.savefig('figures/02_15_2.png', dpi=300)
587
plt.show()
588
589
590
591
592
ada_sgd.partial_fit(X_std[0, :], y[0])
593
594
595
596
# # Summary
597
598
# ...
599
600
# ---
601
#
602
# Readers may ignore the following cell
603
604
605
606
607
608
609
610
611
612
613