CoCalc -- ch12

GitHub Repository: rasbt/machine-learning-book
Path: blob/main/ch12/ch12_part2.py
¹²⁴⁷ views
1
# coding: utf-8
2

3

4
import sys
5
from python_environment_check import check_packages
6
import torch
7
import numpy as np
8
import matplotlib.pyplot as plt
9
from torch.utils.data import TensorDataset
10
from torch.utils.data import DataLoader
11
import torch.nn as nn
12
from sklearn.datasets import load_iris
13
from sklearn.model_selection import train_test_split 
14
from scipy.special import expit
15

16
# # Machine Learning with PyTorch and Scikit-Learn  
17
# # -- Code Examples
18

19
# ## Package version checks
20

21
# Add folder to path in order to load from the check_packages.py script:
22

23

24

25
sys.path.insert(0, '..')
26

27

28
# Check recommended package versions:
29

30

31

32

33

34
d = {
35
    'numpy': '1.21.2',
36
    'scipy': '1.7.0',
37
    'matplotlib': '3.4.3',
38
    'torch': '1.9.0',
39
}
40
check_packages(d)
41

42

43
# # Chapter 12: Parallelizing Neural Network Training with PyTorch  (Part 2/2)
44
# 
45

46
# - [Building an NN model in PyTorch](#Building-an-NN-model-in-PyTorch)
47
#   - [The PyTorch neural network module (torch.nn)](#The-PyTorch-neural-network-module-(torch.nn))
48
#   - [Building a linear regression model](#Building-a-linear-regression-model)
49
#   - [Model training via the torch.nn and torch.optim modules](#Model-training-via-the-torch.nn-and-torch.optim-modules)
50
#   - [Building a multilayer perceptron for classifying flowers in the Iris dataset](#Building-a-multilayer-perceptron-for-classifying-flowers-in-the-Iris-dataset)
51
#   - [Evaluating the trained model on the test dataset](#Evaluating-the-trained-model-on-the-test-dataset)
52
#   - [Saving and reloading the trained model](#Saving-and-reloading-the-trained-model)
53
# - [Choosing activation functions for multilayer neural
54
# networks](#Choosing-activation-functions-for-multilayer-neural-networks)
55
#   - [Logistic function recap](#Logistic-function-recap)
56
#   - [Estimating class probabilities in multiclass classification via the softmax function](#Estimating-class-probabilities-in-multiclass-classification-via-the-softmax-function)
57
#   - [Broadening the output spectrum using a hyperbolic tangent](#Broadening-the-output-spectrum-using-a-hyperbolic-tangent)
58
#   - [Rectified linear unit activation](#Rectified-linear-unit-activation)
59
# - [Summary](#Summary)
60

61
# Note that the optional watermark extension is a small IPython notebook plugin that I developed to make the code reproducible. You can just skip the following line(s).
62

63

64

65

66

67
# ## Building a neural network model in PyTorch
68

69
# ### The PyTorch neural network module (torch.nn)
70

71
# ### Building a linear regression model
72

73

74

75

76

77

78

79
X_train = np.arange(10, dtype='float32').reshape((10, 1))
80
y_train = np.array([1.0, 1.3, 3.1, 2.0, 5.0, 6.3, 6.6, 
81
                    7.4, 8.0, 9.0], dtype='float32')
82

83
plt.plot(X_train, y_train, 'o', markersize=10)
84
plt.xlabel('x')
85
plt.ylabel('y')
86

87
#plt.savefig('figures/12_07.pdf')
88
plt.show()
89

90

91

92

93

94
X_train_norm = (X_train - np.mean(X_train)) / np.std(X_train)
95
X_train_norm = torch.from_numpy(X_train_norm)
96

97
# On some computers the explicit cast to .float() is
98
# necessary
99
y_train = torch.from_numpy(y_train).float()
100

101
train_ds = TensorDataset(X_train_norm, y_train)
102

103
batch_size = 1
104
train_dl = DataLoader(train_ds, batch_size, shuffle=True)
105

106

107

108

109
torch.manual_seed(1)
110
weight = torch.randn(1)
111
weight.requires_grad_()
112
bias = torch.zeros(1, requires_grad=True)
113
 
114
def loss_fn(input, target):
115
    return (input-target).pow(2).mean()
116

117
def model(xb):
118
    return xb @ weight + bias
119

120
learning_rate = 0.001
121
num_epochs = 200
122
log_epochs = 10
123

124
for epoch in range(num_epochs):
125
    for x_batch, y_batch in train_dl:
126
        pred = model(x_batch)
127
        loss = loss_fn(pred, y_batch)
128
        loss.backward()
129

130
        with torch.no_grad():
131
            weight -= weight.grad * learning_rate
132
            bias -= bias.grad * learning_rate
133
            weight.grad.zero_()
134
            bias.grad.zero_()
135
 
136
    if epoch % log_epochs==0:
137
        print(f'Epoch {epoch}  Loss {loss.item():.4f}')
138
 
139

140

141

142

143
print('Final Parameters:', weight.item(), bias.item())
144
 
145
X_test = np.linspace(0, 9, num=100, dtype='float32').reshape(-1, 1)
146
X_test_norm = (X_test - np.mean(X_train)) / np.std(X_train)
147
X_test_norm = torch.from_numpy(X_test_norm)
148
y_pred = model(X_test_norm).detach().numpy()
149

150

151
fig = plt.figure(figsize=(13, 5))
152
ax = fig.add_subplot(1, 2, 1)
153
plt.plot(X_train_norm, y_train, 'o', markersize=10)
154
plt.plot(X_test_norm, y_pred, '--', lw=3)
155
plt.legend(['Training examples', 'Linear Reg.'], fontsize=15)
156
ax.set_xlabel('x', size=15)
157
ax.set_ylabel('y', size=15)
158
ax.tick_params(axis='both', which='major', labelsize=15)
159
 
160
#plt.savefig('figures/12_08.pdf')
161

162
plt.show()
163

164

165
# ### Model training via the torch.nn and torch.optim modules
166

167

168

169

170
input_size = 1
171
output_size = 1
172
model = nn.Linear(input_size, output_size)
173

174
loss_fn = nn.MSELoss(reduction='mean')
175

176
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
177

178
for epoch in range(num_epochs):
179
    for x_batch, y_batch in train_dl:
180
        # 1. Generate predictions
181
        pred = model(x_batch)[:, 0] 
182

183
        # 2. Calculate loss
184
        loss = loss_fn(pred, y_batch)
185

186
        # 3. Compute gradients
187
        loss.backward()
188

189
        # 4. Update parameters using gradients
190
        optimizer.step()
191

192
        # 5. Reset the gradients to zero
193
        optimizer.zero_grad()
194
        
195
    if epoch % log_epochs==0:
196
        print(f'Epoch {epoch}  Loss {loss.item():.4f}')
197

198

199

200

201
print('Final Parameters:', model.weight.item(), model.bias.item())
202
 
203
X_test = np.linspace(0, 9, num=100, dtype='float32').reshape(-1, 1)
204
X_test_norm = (X_test - np.mean(X_train)) / np.std(X_train)
205
X_test_norm = torch.from_numpy(X_test_norm)
206
y_pred = model(X_test_norm).detach().numpy()
207

208

209
fig = plt.figure(figsize=(13, 5))
210
ax = fig.add_subplot(1, 2, 1)
211
plt.plot(X_train_norm, y_train, 'o', markersize=10)
212
plt.plot(X_test_norm, y_pred, '--', lw=3)
213
plt.legend(['Training examples', 'Linear reg.'], fontsize=15)
214
ax.set_xlabel('x', size=15)
215
ax.set_ylabel('y', size=15)
216
ax.tick_params(axis='both', which='major', labelsize=15)
217
 
218
#plt.savefig('ch12-linreg-2.pdf')
219

220
plt.show()
221

222

223
# ## Building a multilayer perceptron for classifying flowers in the Iris dataset
224

225

226

227

228
iris = load_iris()
229
X = iris['data']
230
y = iris['target']
231
 
232
X_train, X_test, y_train, y_test = train_test_split(
233
    X, y, test_size=1./3, random_state=1)
234

235

236

237

238

239
X_train_norm = (X_train - np.mean(X_train)) / np.std(X_train)
240
X_train_norm = torch.from_numpy(X_train_norm).float()
241
y_train = torch.from_numpy(y_train) 
242

243
train_ds = TensorDataset(X_train_norm, y_train)
244

245
torch.manual_seed(1)
246
batch_size = 2
247
train_dl = DataLoader(train_ds, batch_size, shuffle=True)
248

249

250

251

252
class Model(nn.Module):
253
    def __init__(self, input_size, hidden_size, output_size):
254
        super().__init__()
255
        self.layer1 = nn.Linear(input_size, hidden_size)  
256
        self.layer2 = nn.Linear(hidden_size, output_size)  
257

258
    def forward(self, x):
259
        x = self.layer1(x)
260
        x = nn.Sigmoid()(x)
261
        x = self.layer2(x)
262
        x = nn.Softmax(dim=1)(x)
263
        return x
264
    
265
input_size = X_train_norm.shape[1]
266
hidden_size = 16
267
output_size = 3
268
 
269
model = Model(input_size, hidden_size, output_size)
270

271
learning_rate = 0.001
272

273
loss_fn = nn.CrossEntropyLoss()
274
 
275
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
276

277

278

279

280
num_epochs = 100
281
loss_hist = [0] * num_epochs
282
accuracy_hist = [0] * num_epochs
283

284
for epoch in range(num_epochs):
285

286
    for x_batch, y_batch in train_dl:
287
        pred = model(x_batch)
288
        loss = loss_fn(pred, y_batch.long())
289
        loss.backward()
290
        optimizer.step()
291
        optimizer.zero_grad()
292
    
293
        loss_hist[epoch] += loss.item()*y_batch.size(0)
294
        is_correct = (torch.argmax(pred, dim=1) == y_batch).float()
295
        accuracy_hist[epoch] += is_correct.sum()
296
        
297
    loss_hist[epoch] /= len(train_dl.dataset)
298
    accuracy_hist[epoch] /= len(train_dl.dataset)
299

300

301

302

303
fig = plt.figure(figsize=(12, 5))
304
ax = fig.add_subplot(1, 2, 1)
305
ax.plot(loss_hist, lw=3)
306
ax.set_title('Training loss', size=15)
307
ax.set_xlabel('Epoch', size=15)
308
ax.tick_params(axis='both', which='major', labelsize=15)
309

310
ax = fig.add_subplot(1, 2, 2)
311
ax.plot(accuracy_hist, lw=3)
312
ax.set_title('Training accuracy', size=15)
313
ax.set_xlabel('Epoch', size=15)
314
ax.tick_params(axis='both', which='major', labelsize=15)
315
plt.tight_layout()
316

317
#plt.savefig('figures/12_09.pdf')
318
 
319
plt.show()
320

321

322
# ### Evaluating the trained model on the test dataset
323

324

325

326
X_test_norm = (X_test - np.mean(X_train)) / np.std(X_train)
327
X_test_norm = torch.from_numpy(X_test_norm).float()
328
y_test = torch.from_numpy(y_test) 
329
pred_test = model(X_test_norm)
330

331
correct = (torch.argmax(pred_test, dim=1) == y_test).float()
332
accuracy = correct.mean()
333
 
334
print(f'Test Acc.: {accuracy:.4f}')
335

336

337
# ### Saving and reloading the trained model
338

339

340

341
path = 'iris_classifier.pt'
342
torch.save(model, path)
343

344

345

346

347
model_new = torch.load(path)
348
model_new.eval()
349

350

351

352

353
pred_test = model_new(X_test_norm)
354

355
correct = (torch.argmax(pred_test, dim=1) == y_test).float()
356
accuracy = correct.mean()
357
 
358
print(f'Test Acc.: {accuracy:.4f}')
359

360

361

362

363
path = 'iris_classifier_state.pt'
364
torch.save(model.state_dict(), path)
365

366

367

368

369
model_new = Model(input_size, hidden_size, output_size)
370
model_new.load_state_dict(torch.load(path))
371

372

373
# ## Choosing activation functions for multilayer neural networks
374
# 
375

376
# ### Logistic function recap
377

378

379

380

381
X = np.array([1, 1.4, 2.5]) ## first value must be 1
382
w = np.array([0.4, 0.3, 0.5])
383

384
def net_input(X, w):
385
    return np.dot(X, w)
386

387
def logistic(z):
388
    return 1.0 / (1.0 + np.exp(-z))
389

390
def logistic_activation(X, w):
391
    z = net_input(X, w)
392
    return logistic(z)
393

394
print(f'P(y=1|x) = {logistic_activation(X, w):.3f}') 
395

396

397

398

399
# W : array with shape = (n_output_units, n_hidden_units+1)
400
# note that the first column are the bias units
401

402
W = np.array([[1.1, 1.2, 0.8, 0.4],
403
              [0.2, 0.4, 1.0, 0.2],
404
              [0.6, 1.5, 1.2, 0.7]])
405

406
# A : data array with shape = (n_hidden_units + 1, n_samples)
407
# note that the first column of this array must be 1
408

409
A = np.array([[1, 0.1, 0.4, 0.6]])
410
Z = np.dot(W, A[0])
411
y_probas = logistic(Z)
412
print('Net Input: \n', Z)
413

414
print('Output Units:\n', y_probas) 
415

416

417

418

419
y_class = np.argmax(Z, axis=0)
420
print('Predicted class label:', y_class) 
421

422

423
# ### Estimating class probabilities in multiclass classification via the softmax function
424

425

426

427
def softmax(z):
428
    return np.exp(z) / np.sum(np.exp(z))
429

430
y_probas = softmax(Z)
431
print('Probabilities:\n', y_probas)
432

433
np.sum(y_probas)
434

435

436

437

438
torch.softmax(torch.from_numpy(Z), dim=0)
439

440

441
# ### Broadening the output spectrum using a hyperbolic tangent
442

443

444

445

446
def tanh(z):
447
    e_p = np.exp(z)
448
    e_m = np.exp(-z)
449
    return (e_p - e_m) / (e_p + e_m)
450

451
z = np.arange(-5, 5, 0.005)
452
log_act = logistic(z)
453
tanh_act = tanh(z)
454
plt.ylim([-1.5, 1.5])
455
plt.xlabel('Net input $z$')
456
plt.ylabel('Activation $\phi(z)$')
457
plt.axhline(1, color='black', linestyle=':')
458
plt.axhline(0.5, color='black', linestyle=':')
459
plt.axhline(0, color='black', linestyle=':')
460
plt.axhline(-0.5, color='black', linestyle=':')
461
plt.axhline(-1, color='black', linestyle=':')
462
plt.plot(z, tanh_act,
463
    linewidth=3, linestyle='--',
464
    label='Tanh')
465
plt.plot(z, log_act,
466
    linewidth=3,
467
    label='Logistic')
468
plt.legend(loc='lower right')
469
plt.tight_layout()
470

471
#plt.savefig('figures/12_10.pdf')
472
plt.show()
473

474

475

476

477
np.tanh(z)
478

479

480

481

482
torch.tanh(torch.from_numpy(z))
483
 
484

485

486

487

488

489
expit(z)
490

491

492

493

494
torch.sigmoid(torch.from_numpy(z))
495

496

497
# ### Rectified linear unit activation
498

499

500

501
torch.relu(torch.from_numpy(z))
502

503

504

505

506
IPythonImage(filename='figures/12_11.png', width=500)
507

508

509
# ## Summary
510

511
# ---
512
# 
513
# Readers may ignore the next cell.
514

515

516

517

518

519

520

521

522

523

524
Product

Resources

Company