Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
rasbt
GitHub Repository: rasbt/machine-learning-book
Path: blob/main/ch12/ch12_part2.py
1247 views
1
# coding: utf-8
2
3
4
import sys
5
from python_environment_check import check_packages
6
import torch
7
import numpy as np
8
import matplotlib.pyplot as plt
9
from torch.utils.data import TensorDataset
10
from torch.utils.data import DataLoader
11
import torch.nn as nn
12
from sklearn.datasets import load_iris
13
from sklearn.model_selection import train_test_split
14
from scipy.special import expit
15
16
# # Machine Learning with PyTorch and Scikit-Learn
17
# # -- Code Examples
18
19
# ## Package version checks
20
21
# Add folder to path in order to load from the check_packages.py script:
22
23
24
25
sys.path.insert(0, '..')
26
27
28
# Check recommended package versions:
29
30
31
32
33
34
d = {
35
'numpy': '1.21.2',
36
'scipy': '1.7.0',
37
'matplotlib': '3.4.3',
38
'torch': '1.9.0',
39
}
40
check_packages(d)
41
42
43
# # Chapter 12: Parallelizing Neural Network Training with PyTorch (Part 2/2)
44
#
45
46
# - [Building an NN model in PyTorch](#Building-an-NN-model-in-PyTorch)
47
# - [The PyTorch neural network module (torch.nn)](#The-PyTorch-neural-network-module-(torch.nn))
48
# - [Building a linear regression model](#Building-a-linear-regression-model)
49
# - [Model training via the torch.nn and torch.optim modules](#Model-training-via-the-torch.nn-and-torch.optim-modules)
50
# - [Building a multilayer perceptron for classifying flowers in the Iris dataset](#Building-a-multilayer-perceptron-for-classifying-flowers-in-the-Iris-dataset)
51
# - [Evaluating the trained model on the test dataset](#Evaluating-the-trained-model-on-the-test-dataset)
52
# - [Saving and reloading the trained model](#Saving-and-reloading-the-trained-model)
53
# - [Choosing activation functions for multilayer neural
54
# networks](#Choosing-activation-functions-for-multilayer-neural-networks)
55
# - [Logistic function recap](#Logistic-function-recap)
56
# - [Estimating class probabilities in multiclass classification via the softmax function](#Estimating-class-probabilities-in-multiclass-classification-via-the-softmax-function)
57
# - [Broadening the output spectrum using a hyperbolic tangent](#Broadening-the-output-spectrum-using-a-hyperbolic-tangent)
58
# - [Rectified linear unit activation](#Rectified-linear-unit-activation)
59
# - [Summary](#Summary)
60
61
# Note that the optional watermark extension is a small IPython notebook plugin that I developed to make the code reproducible. You can just skip the following line(s).
62
63
64
65
66
67
# ## Building a neural network model in PyTorch
68
69
# ### The PyTorch neural network module (torch.nn)
70
71
# ### Building a linear regression model
72
73
74
75
76
77
78
79
X_train = np.arange(10, dtype='float32').reshape((10, 1))
80
y_train = np.array([1.0, 1.3, 3.1, 2.0, 5.0, 6.3, 6.6,
81
7.4, 8.0, 9.0], dtype='float32')
82
83
plt.plot(X_train, y_train, 'o', markersize=10)
84
plt.xlabel('x')
85
plt.ylabel('y')
86
87
#plt.savefig('figures/12_07.pdf')
88
plt.show()
89
90
91
92
93
94
X_train_norm = (X_train - np.mean(X_train)) / np.std(X_train)
95
X_train_norm = torch.from_numpy(X_train_norm)
96
97
# On some computers the explicit cast to .float() is
98
# necessary
99
y_train = torch.from_numpy(y_train).float()
100
101
train_ds = TensorDataset(X_train_norm, y_train)
102
103
batch_size = 1
104
train_dl = DataLoader(train_ds, batch_size, shuffle=True)
105
106
107
108
109
torch.manual_seed(1)
110
weight = torch.randn(1)
111
weight.requires_grad_()
112
bias = torch.zeros(1, requires_grad=True)
113
114
def loss_fn(input, target):
115
return (input-target).pow(2).mean()
116
117
def model(xb):
118
return xb @ weight + bias
119
120
learning_rate = 0.001
121
num_epochs = 200
122
log_epochs = 10
123
124
for epoch in range(num_epochs):
125
for x_batch, y_batch in train_dl:
126
pred = model(x_batch)
127
loss = loss_fn(pred, y_batch)
128
loss.backward()
129
130
with torch.no_grad():
131
weight -= weight.grad * learning_rate
132
bias -= bias.grad * learning_rate
133
weight.grad.zero_()
134
bias.grad.zero_()
135
136
if epoch % log_epochs==0:
137
print(f'Epoch {epoch} Loss {loss.item():.4f}')
138
139
140
141
142
143
print('Final Parameters:', weight.item(), bias.item())
144
145
X_test = np.linspace(0, 9, num=100, dtype='float32').reshape(-1, 1)
146
X_test_norm = (X_test - np.mean(X_train)) / np.std(X_train)
147
X_test_norm = torch.from_numpy(X_test_norm)
148
y_pred = model(X_test_norm).detach().numpy()
149
150
151
fig = plt.figure(figsize=(13, 5))
152
ax = fig.add_subplot(1, 2, 1)
153
plt.plot(X_train_norm, y_train, 'o', markersize=10)
154
plt.plot(X_test_norm, y_pred, '--', lw=3)
155
plt.legend(['Training examples', 'Linear Reg.'], fontsize=15)
156
ax.set_xlabel('x', size=15)
157
ax.set_ylabel('y', size=15)
158
ax.tick_params(axis='both', which='major', labelsize=15)
159
160
#plt.savefig('figures/12_08.pdf')
161
162
plt.show()
163
164
165
# ### Model training via the torch.nn and torch.optim modules
166
167
168
169
170
input_size = 1
171
output_size = 1
172
model = nn.Linear(input_size, output_size)
173
174
loss_fn = nn.MSELoss(reduction='mean')
175
176
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
177
178
for epoch in range(num_epochs):
179
for x_batch, y_batch in train_dl:
180
# 1. Generate predictions
181
pred = model(x_batch)[:, 0]
182
183
# 2. Calculate loss
184
loss = loss_fn(pred, y_batch)
185
186
# 3. Compute gradients
187
loss.backward()
188
189
# 4. Update parameters using gradients
190
optimizer.step()
191
192
# 5. Reset the gradients to zero
193
optimizer.zero_grad()
194
195
if epoch % log_epochs==0:
196
print(f'Epoch {epoch} Loss {loss.item():.4f}')
197
198
199
200
201
print('Final Parameters:', model.weight.item(), model.bias.item())
202
203
X_test = np.linspace(0, 9, num=100, dtype='float32').reshape(-1, 1)
204
X_test_norm = (X_test - np.mean(X_train)) / np.std(X_train)
205
X_test_norm = torch.from_numpy(X_test_norm)
206
y_pred = model(X_test_norm).detach().numpy()
207
208
209
fig = plt.figure(figsize=(13, 5))
210
ax = fig.add_subplot(1, 2, 1)
211
plt.plot(X_train_norm, y_train, 'o', markersize=10)
212
plt.plot(X_test_norm, y_pred, '--', lw=3)
213
plt.legend(['Training examples', 'Linear reg.'], fontsize=15)
214
ax.set_xlabel('x', size=15)
215
ax.set_ylabel('y', size=15)
216
ax.tick_params(axis='both', which='major', labelsize=15)
217
218
#plt.savefig('ch12-linreg-2.pdf')
219
220
plt.show()
221
222
223
# ## Building a multilayer perceptron for classifying flowers in the Iris dataset
224
225
226
227
228
iris = load_iris()
229
X = iris['data']
230
y = iris['target']
231
232
X_train, X_test, y_train, y_test = train_test_split(
233
X, y, test_size=1./3, random_state=1)
234
235
236
237
238
239
X_train_norm = (X_train - np.mean(X_train)) / np.std(X_train)
240
X_train_norm = torch.from_numpy(X_train_norm).float()
241
y_train = torch.from_numpy(y_train)
242
243
train_ds = TensorDataset(X_train_norm, y_train)
244
245
torch.manual_seed(1)
246
batch_size = 2
247
train_dl = DataLoader(train_ds, batch_size, shuffle=True)
248
249
250
251
252
class Model(nn.Module):
253
def __init__(self, input_size, hidden_size, output_size):
254
super().__init__()
255
self.layer1 = nn.Linear(input_size, hidden_size)
256
self.layer2 = nn.Linear(hidden_size, output_size)
257
258
def forward(self, x):
259
x = self.layer1(x)
260
x = nn.Sigmoid()(x)
261
x = self.layer2(x)
262
x = nn.Softmax(dim=1)(x)
263
return x
264
265
input_size = X_train_norm.shape[1]
266
hidden_size = 16
267
output_size = 3
268
269
model = Model(input_size, hidden_size, output_size)
270
271
learning_rate = 0.001
272
273
loss_fn = nn.CrossEntropyLoss()
274
275
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
276
277
278
279
280
num_epochs = 100
281
loss_hist = [0] * num_epochs
282
accuracy_hist = [0] * num_epochs
283
284
for epoch in range(num_epochs):
285
286
for x_batch, y_batch in train_dl:
287
pred = model(x_batch)
288
loss = loss_fn(pred, y_batch.long())
289
loss.backward()
290
optimizer.step()
291
optimizer.zero_grad()
292
293
loss_hist[epoch] += loss.item()*y_batch.size(0)
294
is_correct = (torch.argmax(pred, dim=1) == y_batch).float()
295
accuracy_hist[epoch] += is_correct.sum()
296
297
loss_hist[epoch] /= len(train_dl.dataset)
298
accuracy_hist[epoch] /= len(train_dl.dataset)
299
300
301
302
303
fig = plt.figure(figsize=(12, 5))
304
ax = fig.add_subplot(1, 2, 1)
305
ax.plot(loss_hist, lw=3)
306
ax.set_title('Training loss', size=15)
307
ax.set_xlabel('Epoch', size=15)
308
ax.tick_params(axis='both', which='major', labelsize=15)
309
310
ax = fig.add_subplot(1, 2, 2)
311
ax.plot(accuracy_hist, lw=3)
312
ax.set_title('Training accuracy', size=15)
313
ax.set_xlabel('Epoch', size=15)
314
ax.tick_params(axis='both', which='major', labelsize=15)
315
plt.tight_layout()
316
317
#plt.savefig('figures/12_09.pdf')
318
319
plt.show()
320
321
322
# ### Evaluating the trained model on the test dataset
323
324
325
326
X_test_norm = (X_test - np.mean(X_train)) / np.std(X_train)
327
X_test_norm = torch.from_numpy(X_test_norm).float()
328
y_test = torch.from_numpy(y_test)
329
pred_test = model(X_test_norm)
330
331
correct = (torch.argmax(pred_test, dim=1) == y_test).float()
332
accuracy = correct.mean()
333
334
print(f'Test Acc.: {accuracy:.4f}')
335
336
337
# ### Saving and reloading the trained model
338
339
340
341
path = 'iris_classifier.pt'
342
torch.save(model, path)
343
344
345
346
347
model_new = torch.load(path)
348
model_new.eval()
349
350
351
352
353
pred_test = model_new(X_test_norm)
354
355
correct = (torch.argmax(pred_test, dim=1) == y_test).float()
356
accuracy = correct.mean()
357
358
print(f'Test Acc.: {accuracy:.4f}')
359
360
361
362
363
path = 'iris_classifier_state.pt'
364
torch.save(model.state_dict(), path)
365
366
367
368
369
model_new = Model(input_size, hidden_size, output_size)
370
model_new.load_state_dict(torch.load(path))
371
372
373
# ## Choosing activation functions for multilayer neural networks
374
#
375
376
# ### Logistic function recap
377
378
379
380
381
X = np.array([1, 1.4, 2.5]) ## first value must be 1
382
w = np.array([0.4, 0.3, 0.5])
383
384
def net_input(X, w):
385
return np.dot(X, w)
386
387
def logistic(z):
388
return 1.0 / (1.0 + np.exp(-z))
389
390
def logistic_activation(X, w):
391
z = net_input(X, w)
392
return logistic(z)
393
394
print(f'P(y=1|x) = {logistic_activation(X, w):.3f}')
395
396
397
398
399
# W : array with shape = (n_output_units, n_hidden_units+1)
400
# note that the first column are the bias units
401
402
W = np.array([[1.1, 1.2, 0.8, 0.4],
403
[0.2, 0.4, 1.0, 0.2],
404
[0.6, 1.5, 1.2, 0.7]])
405
406
# A : data array with shape = (n_hidden_units + 1, n_samples)
407
# note that the first column of this array must be 1
408
409
A = np.array([[1, 0.1, 0.4, 0.6]])
410
Z = np.dot(W, A[0])
411
y_probas = logistic(Z)
412
print('Net Input: \n', Z)
413
414
print('Output Units:\n', y_probas)
415
416
417
418
419
y_class = np.argmax(Z, axis=0)
420
print('Predicted class label:', y_class)
421
422
423
# ### Estimating class probabilities in multiclass classification via the softmax function
424
425
426
427
def softmax(z):
428
return np.exp(z) / np.sum(np.exp(z))
429
430
y_probas = softmax(Z)
431
print('Probabilities:\n', y_probas)
432
433
np.sum(y_probas)
434
435
436
437
438
torch.softmax(torch.from_numpy(Z), dim=0)
439
440
441
# ### Broadening the output spectrum using a hyperbolic tangent
442
443
444
445
446
def tanh(z):
447
e_p = np.exp(z)
448
e_m = np.exp(-z)
449
return (e_p - e_m) / (e_p + e_m)
450
451
z = np.arange(-5, 5, 0.005)
452
log_act = logistic(z)
453
tanh_act = tanh(z)
454
plt.ylim([-1.5, 1.5])
455
plt.xlabel('Net input $z$')
456
plt.ylabel('Activation $\phi(z)$')
457
plt.axhline(1, color='black', linestyle=':')
458
plt.axhline(0.5, color='black', linestyle=':')
459
plt.axhline(0, color='black', linestyle=':')
460
plt.axhline(-0.5, color='black', linestyle=':')
461
plt.axhline(-1, color='black', linestyle=':')
462
plt.plot(z, tanh_act,
463
linewidth=3, linestyle='--',
464
label='Tanh')
465
plt.plot(z, log_act,
466
linewidth=3,
467
label='Logistic')
468
plt.legend(loc='lower right')
469
plt.tight_layout()
470
471
#plt.savefig('figures/12_10.pdf')
472
plt.show()
473
474
475
476
477
np.tanh(z)
478
479
480
481
482
torch.tanh(torch.from_numpy(z))
483
484
485
486
487
488
489
expit(z)
490
491
492
493
494
torch.sigmoid(torch.from_numpy(z))
495
496
497
# ### Rectified linear unit activation
498
499
500
501
torch.relu(torch.from_numpy(z))
502
503
504
505
506
IPythonImage(filename='figures/12_11.png', width=500)
507
508
509
# ## Summary
510
511
# ---
512
#
513
# Readers may ignore the next cell.
514
515
516
517
518
519
520
521
522
523
524