Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
rasbt
GitHub Repository: rasbt/machine-learning-book
Path: blob/main/ch11/ch11.py
1247 views
1
# coding: utf-8
2
3
4
import sys
5
from python_environment_check import check_packages
6
from sklearn.datasets import fetch_openml
7
import matplotlib.pyplot as plt
8
from sklearn.model_selection import train_test_split
9
import numpy as np
10
11
# # Machine Learning with PyTorch and Scikit-Learn
12
# # -- Code Examples
13
14
# ## Package version checks
15
16
# Add folder to path in order to load from the check_packages.py script:
17
18
19
20
sys.path.insert(0, '..')
21
22
23
# Check recommended package versions:
24
25
26
27
28
29
d = {
30
'numpy': '1.21.2',
31
'matplotlib': '3.4.3',
32
'sklearn': '1.0',
33
}
34
check_packages(d)
35
36
37
# # Chapter 11 - Implementing a Multi-layer Artificial Neural Network from Scratch
38
#
39
40
# ### Overview
41
42
# - [Modeling complex functions with artificial neural networks](#Modeling-complex-functions-with-artificial-neural-networks)
43
# - [Single-layer neural network recap](#Single-layer-neural-network-recap)
44
# - [Introducing the multi-layer neural network architecture](#Introducing-the-multi-layer-neural-network-architecture)
45
# - [Activating a neural network via forward propagation](#Activating-a-neural-network-via-forward-propagation)
46
# - [Classifying handwritten digits](#Classifying-handwritten-digits)
47
# - [Obtaining the MNIST dataset](#Obtaining-the-MNIST-dataset)
48
# - [Implementing a multi-layer perceptron](#Implementing-a-multi-layer-perceptron)
49
# - [Coding the neural network training loop](#Coding-the-neural-network-training-loop)
50
# - [Evaluating the neural network performance](#Evaluating-the-neural-network-performance)
51
# - [Training an artificial neural network](#Training-an-artificial-neural-network)
52
# - [Computing the loss function](#Computing-the-loss-function)
53
# - [Developing your intuition for backpropagation](#Developing-your-intuition-for-backpropagation)
54
# - [Training neural networks via backpropagation](#Training-neural-networks-via-backpropagation)
55
# - [Convergence in neural networks](#Convergence-in-neural-networks)
56
# - [Summary](#Summary)
57
58
59
60
61
62
63
# # Modeling complex functions with artificial neural networks
64
65
# ...
66
67
# ## Single-layer neural network recap
68
69
70
71
72
73
74
# ## Introducing the multi-layer neural network architecture
75
76
77
78
79
80
81
82
83
84
85
# ## Activating a neural network via forward propagation
86
87
88
# # Classifying handwritten digits
89
90
# ...
91
92
# ## Obtaining and preparing the MNIST dataset
93
94
# The MNIST dataset is publicly available at http://yann.lecun.com/exdb/mnist/ and consists of the following four parts:
95
#
96
# - Training set images: train-images-idx3-ubyte.gz (9.9 MB, 47 MB unzipped, 60,000 examples)
97
# - Training set labels: train-labels-idx1-ubyte.gz (29 KB, 60 KB unzipped, 60,000 labels)
98
# - Test set images: t10k-images-idx3-ubyte.gz (1.6 MB, 7.8 MB, 10,000 examples)
99
# - Test set labels: t10k-labels-idx1-ubyte.gz (5 KB, 10 KB unzipped, 10,000 labels)
100
#
101
#
102
103
104
105
106
107
X, y = fetch_openml('mnist_784', version=1, return_X_y=True)
108
X = X.values
109
y = y.astype(int).values
110
111
print(X.shape)
112
print(y.shape)
113
114
115
# Normalize to [-1, 1] range:
116
117
118
119
X = ((X / 255.) - .5) * 2
120
121
122
# Visualize the first digit of each class:
123
124
125
126
127
fig, ax = plt.subplots(nrows=2, ncols=5, sharex=True, sharey=True)
128
ax = ax.flatten()
129
for i in range(10):
130
img = X[y == i][0].reshape(28, 28)
131
ax[i].imshow(img, cmap='Greys')
132
133
ax[0].set_xticks([])
134
ax[0].set_yticks([])
135
plt.tight_layout()
136
#plt.savefig('figures/11_4.png', dpi=300)
137
plt.show()
138
139
140
# Visualize 25 different versions of "7":
141
142
143
144
fig, ax = plt.subplots(nrows=5, ncols=5, sharex=True, sharey=True)
145
ax = ax.flatten()
146
for i in range(25):
147
img = X[y == 7][i].reshape(28, 28)
148
ax[i].imshow(img, cmap='Greys')
149
150
ax[0].set_xticks([])
151
ax[0].set_yticks([])
152
plt.tight_layout()
153
# plt.savefig('figures/11_5.png', dpi=300)
154
plt.show()
155
156
157
# Split into training, validation, and test set:
158
159
160
161
162
163
X_temp, X_test, y_temp, y_test = train_test_split(
164
X, y, test_size=10000, random_state=123, stratify=y)
165
166
X_train, X_valid, y_train, y_valid = train_test_split(
167
X_temp, y_temp, test_size=5000, random_state=123, stratify=y_temp)
168
169
170
# optional to free up some memory by deleting non-used arrays:
171
del X_temp, y_temp, X, y
172
173
174
175
# ## Implementing a multi-layer perceptron
176
177
178
179
180
181
182
183
##########################
184
### MODEL
185
##########################
186
187
def sigmoid(z):
188
return 1. / (1. + np.exp(-z))
189
190
191
def int_to_onehot(y, num_labels):
192
193
ary = np.zeros((y.shape[0], num_labels))
194
for i, val in enumerate(y):
195
ary[i, val] = 1
196
197
return ary
198
199
200
class NeuralNetMLP:
201
202
def __init__(self, num_features, num_hidden, num_classes, random_seed=123):
203
super().__init__()
204
205
self.num_classes = num_classes
206
207
# hidden
208
rng = np.random.RandomState(random_seed)
209
210
self.weight_h = rng.normal(
211
loc=0.0, scale=0.1, size=(num_hidden, num_features))
212
self.bias_h = np.zeros(num_hidden)
213
214
# output
215
self.weight_out = rng.normal(
216
loc=0.0, scale=0.1, size=(num_classes, num_hidden))
217
self.bias_out = np.zeros(num_classes)
218
219
def forward(self, x):
220
# Hidden layer
221
# input dim: [n_examples, n_features] dot [n_hidden, n_features].T
222
# output dim: [n_examples, n_hidden]
223
z_h = np.dot(x, self.weight_h.T) + self.bias_h
224
a_h = sigmoid(z_h)
225
226
# Output layer
227
# input dim: [n_examples, n_hidden] dot [n_classes, n_hidden].T
228
# output dim: [n_examples, n_classes]
229
z_out = np.dot(a_h, self.weight_out.T) + self.bias_out
230
a_out = sigmoid(z_out)
231
return a_h, a_out
232
233
def backward(self, x, a_h, a_out, y):
234
235
#########################
236
### Output layer weights
237
#########################
238
239
# onehot encoding
240
y_onehot = int_to_onehot(y, self.num_classes)
241
242
# Part 1: dLoss/dOutWeights
243
## = dLoss/dOutAct * dOutAct/dOutNet * dOutNet/dOutWeight
244
## where DeltaOut = dLoss/dOutAct * dOutAct/dOutNet
245
## for convenient re-use
246
247
# input/output dim: [n_examples, n_classes]
248
d_loss__d_a_out = 2.*(a_out - y_onehot) / y.shape[0]
249
250
# input/output dim: [n_examples, n_classes]
251
d_a_out__d_z_out = a_out * (1. - a_out) # sigmoid derivative
252
253
# output dim: [n_examples, n_classes]
254
delta_out = d_loss__d_a_out * d_a_out__d_z_out # "delta (rule) placeholder"
255
256
# gradient for output weights
257
258
# [n_examples, n_hidden]
259
d_z_out__dw_out = a_h
260
261
# input dim: [n_classes, n_examples] dot [n_examples, n_hidden]
262
# output dim: [n_classes, n_hidden]
263
d_loss__dw_out = np.dot(delta_out.T, d_z_out__dw_out)
264
d_loss__db_out = np.sum(delta_out, axis=0)
265
266
267
#################################
268
# Part 2: dLoss/dHiddenWeights
269
## = DeltaOut * dOutNet/dHiddenAct * dHiddenAct/dHiddenNet * dHiddenNet/dWeight
270
271
# [n_classes, n_hidden]
272
d_z_out__a_h = self.weight_out
273
274
# output dim: [n_examples, n_hidden]
275
d_loss__a_h = np.dot(delta_out, d_z_out__a_h)
276
277
# [n_examples, n_hidden]
278
d_a_h__d_z_h = a_h * (1. - a_h) # sigmoid derivative
279
280
# [n_examples, n_features]
281
d_z_h__d_w_h = x
282
283
# output dim: [n_hidden, n_features]
284
d_loss__d_w_h = np.dot((d_loss__a_h * d_a_h__d_z_h).T, d_z_h__d_w_h)
285
d_loss__d_b_h = np.sum((d_loss__a_h * d_a_h__d_z_h), axis=0)
286
287
return (d_loss__dw_out, d_loss__db_out,
288
d_loss__d_w_h, d_loss__d_b_h)
289
290
291
292
293
model = NeuralNetMLP(num_features=28*28,
294
num_hidden=50,
295
num_classes=10)
296
297
298
# ## Coding the neural network training loop
299
300
# Defining data loaders:
301
302
303
304
305
num_epochs = 50
306
minibatch_size = 100
307
308
309
def minibatch_generator(X, y, minibatch_size):
310
indices = np.arange(X.shape[0])
311
np.random.shuffle(indices)
312
313
for start_idx in range(0, indices.shape[0] - minibatch_size
314
+ 1, minibatch_size):
315
batch_idx = indices[start_idx:start_idx + minibatch_size]
316
317
yield X[batch_idx], y[batch_idx]
318
319
320
# iterate over training epochs
321
for i in range(num_epochs):
322
323
# iterate over minibatches
324
minibatch_gen = minibatch_generator(
325
X_train, y_train, minibatch_size)
326
327
for X_train_mini, y_train_mini in minibatch_gen:
328
329
break
330
331
break
332
333
print(X_train_mini.shape)
334
print(y_train_mini.shape)
335
336
337
# Defining a function to compute the loss and accuracy
338
339
340
341
def mse_loss(targets, probas, num_labels=10):
342
onehot_targets = int_to_onehot(targets, num_labels=num_labels)
343
return np.mean((onehot_targets - probas)**2)
344
345
346
def accuracy(targets, predicted_labels):
347
return np.mean(predicted_labels == targets)
348
349
350
_, probas = model.forward(X_valid)
351
mse = mse_loss(y_valid, probas)
352
353
predicted_labels = np.argmax(probas, axis=1)
354
acc = accuracy(y_valid, predicted_labels)
355
356
print(f'Initial validation MSE: {mse:.1f}')
357
print(f'Initial validation accuracy: {acc*100:.1f}%')
358
359
360
361
362
def compute_mse_and_acc(nnet, X, y, num_labels=10, minibatch_size=100):
363
mse, correct_pred, num_examples = 0., 0, 0
364
minibatch_gen = minibatch_generator(X, y, minibatch_size)
365
366
for i, (features, targets) in enumerate(minibatch_gen):
367
368
_, probas = nnet.forward(features)
369
predicted_labels = np.argmax(probas, axis=1)
370
371
onehot_targets = int_to_onehot(targets, num_labels=num_labels)
372
loss = np.mean((onehot_targets - probas)**2)
373
correct_pred += (predicted_labels == targets).sum()
374
375
num_examples += targets.shape[0]
376
mse += loss
377
378
mse = mse/(i+1)
379
acc = correct_pred/num_examples
380
return mse, acc
381
382
383
384
385
mse, acc = compute_mse_and_acc(model, X_valid, y_valid)
386
print(f'Initial valid MSE: {mse:.1f}')
387
print(f'Initial valid accuracy: {acc*100:.1f}%')
388
389
390
391
392
def train(model, X_train, y_train, X_valid, y_valid, num_epochs,
393
learning_rate=0.1):
394
395
epoch_loss = []
396
epoch_train_acc = []
397
epoch_valid_acc = []
398
399
for e in range(num_epochs):
400
401
# iterate over minibatches
402
minibatch_gen = minibatch_generator(
403
X_train, y_train, minibatch_size)
404
405
for X_train_mini, y_train_mini in minibatch_gen:
406
407
#### Compute outputs ####
408
a_h, a_out = model.forward(X_train_mini)
409
410
#### Compute gradients ####
411
d_loss__d_w_out, d_loss__d_b_out, d_loss__d_w_h, d_loss__d_b_h = model.backward(X_train_mini, a_h, a_out, y_train_mini)
412
413
#### Update weights ####
414
model.weight_h -= learning_rate * d_loss__d_w_h
415
model.bias_h -= learning_rate * d_loss__d_b_h
416
model.weight_out -= learning_rate * d_loss__d_w_out
417
model.bias_out -= learning_rate * d_loss__d_b_out
418
419
#### Epoch Logging ####
420
train_mse, train_acc = compute_mse_and_acc(model, X_train, y_train)
421
valid_mse, valid_acc = compute_mse_and_acc(model, X_valid, y_valid)
422
train_acc, valid_acc = train_acc*100, valid_acc*100
423
epoch_train_acc.append(train_acc)
424
epoch_valid_acc.append(valid_acc)
425
epoch_loss.append(train_mse)
426
print(f'Epoch: {e+1:03d}/{num_epochs:03d} '
427
f'| Train MSE: {train_mse:.2f} '
428
f'| Train Acc: {train_acc:.2f}% '
429
f'| Valid Acc: {valid_acc:.2f}%')
430
431
return epoch_loss, epoch_train_acc, epoch_valid_acc
432
433
434
435
436
np.random.seed(123) # for the training set shuffling
437
438
epoch_loss, epoch_train_acc, epoch_valid_acc = train(
439
model, X_train, y_train, X_valid, y_valid,
440
num_epochs=50, learning_rate=0.1)
441
442
443
# ## Evaluating the neural network performance
444
445
446
447
plt.plot(range(len(epoch_loss)), epoch_loss)
448
plt.ylabel('Mean squared error')
449
plt.xlabel('Epoch')
450
#plt.savefig('figures/11_07.png', dpi=300)
451
plt.show()
452
453
454
455
456
plt.plot(range(len(epoch_train_acc)), epoch_train_acc,
457
label='Training')
458
plt.plot(range(len(epoch_valid_acc)), epoch_valid_acc,
459
label='Validation')
460
plt.ylabel('Accuracy')
461
plt.xlabel('Epochs')
462
plt.legend(loc='lower right')
463
#plt.savefig('figures/11_08.png', dpi=300)
464
plt.show()
465
466
467
468
469
test_mse, test_acc = compute_mse_and_acc(model, X_test, y_test)
470
print(f'Test accuracy: {test_acc*100:.2f}%')
471
472
473
# Plot failure cases:
474
475
476
477
X_test_subset = X_test[:1000, :]
478
y_test_subset = y_test[:1000]
479
480
_, probas = model.forward(X_test_subset)
481
test_pred = np.argmax(probas, axis=1)
482
483
misclassified_images = X_test_subset[y_test_subset != test_pred][:25]
484
misclassified_labels = test_pred[y_test_subset != test_pred][:25]
485
correct_labels = y_test_subset[y_test_subset != test_pred][:25]
486
487
488
489
490
fig, ax = plt.subplots(nrows=5, ncols=5,
491
sharex=True, sharey=True, figsize=(8, 8))
492
ax = ax.flatten()
493
for i in range(25):
494
img = misclassified_images[i].reshape(28, 28)
495
ax[i].imshow(img, cmap='Greys', interpolation='nearest')
496
ax[i].set_title(f'{i+1}) '
497
f'True: {correct_labels[i]}\n'
498
f' Predicted: {misclassified_labels[i]}')
499
500
ax[0].set_xticks([])
501
ax[0].set_yticks([])
502
plt.tight_layout()
503
#plt.savefig('figures/11_09.png', dpi=300)
504
plt.show()
505
506
507
508
# # Training an artificial neural network
509
510
# ...
511
512
# ## Computing the loss function
513
514
515
516
517
518
519
# ## Developing your intuition for backpropagation
520
521
# ...
522
523
# ## Training neural networks via backpropagation
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
# # Convergence in neural networks
539
540
541
542
543
544
545
# ...
546
547
# # Summary
548
549
# ...
550
551
# ---
552
#
553
# Readers may ignore the next cell.
554
555
556
557
558
559