CoCalc -- ch13

GitHub Repository: rasbt/machine-learning-book
Path: blob/main/ch13/ch13_part1.py
¹²⁴⁵ views
1
# coding: utf-8
2

3

4
import sys
5
from python_environment_check import check_packages
6
import torch
7
import torch.nn as nn
8
import numpy as np
9
import matplotlib.pyplot as plt
10
from torch.utils.data import DataLoader, TensorDataset
11
from mlxtend.plotting import plot_decision_regions
12

13
# # Machine Learning with PyTorch and Scikit-Learn  
14
# # -- Code Examples
15

16
# ## Package version checks
17

18
# Add folder to path in order to load from the check_packages.py script:
19

20

21

22
sys.path.insert(0, '..')
23

24

25
# Check recommended package versions:
26

27

28

29

30

31
d = {
32
    'numpy': '1.21.2',
33
    'matplotlib': '3.4.3',
34
    'torch': '1.8',
35
    'mlxtend': '0.19.0'
36
}
37
check_packages(d)
38

39

40
# # Chapter 13: Going Deeper -- the Mechanics of PyTorch (Part 1/3)
41

42
# **Outline**
43
# 
44
# - [The key features of PyTorch](#The-key-features-of-PyTorch)
45
# - [PyTorch's computation graphs](#PyTorchs-computation-graphs)
46
#   - [Understanding computation graphs](#Understanding-computation-graphs)
47
#   - [Creating a graph in PyTorch](#Creating-a-graph-in-PyTorch)
48
# - [PyTorch tensor objects for storing and updating model parameters](#PyTorch-tensor-objects-for-storing-and-updating-model-parameters)
49
# - [Computing gradients via automatic differentiation](#Computing-gradients-via-automatic-differentiation)
50
#   - [Computing the gradients of the loss with respect to trainable variables](#Computing-the-gradients-of-the-loss-with-respect-to-trainable-variables)
51
#   - [Understanding automatic differentiation](#Understanding-automatic-differentiation)
52
#   - [Adversarial examples](#Adversarial-examples)
53
# - [Simplifying implementations of common architectures via the torch.nn module](#Simplifying-implementations-of-common-architectures-via-the-torch.nn-module)
54
#   - [Implementing models based on nn.Sequential](#Implementing-models-based-on-nn-Sequential)
55
#   - [Choosing a loss function](#Choosing-a-loss-function)
56
#   - [Solving an XOR classification problem](#Solving-an-XOR-classification-problem)
57
#   - [Making model building more flexible with nn.Module](#Making-model-building-more-flexible-with-nn.Module)
58
#   - [Writing custom layers in PyTorch](#Writing-custom-layers-in-PyTorch)
59

60

61

62

63

64
# ## The key features of PyTorch
65
# 
66
# ## PyTorch's computation graphs
67
# 
68
# ### Understanding computation graphs
69
# 
70
# 
71

72

73

74

75

76
# ### Creating a graph in PyTorch
77
# 
78
# 
79

80

81

82

83

84

85

86
def compute_z(a, b, c):
87
   r1 = torch.sub(a, b)
88
   r2 = torch.mul(r1, 2)
89
   z = torch.add(r2, c)
90
   return z
91

92
print('Scalar Inputs:', compute_z(torch.tensor(1), torch.tensor(2), torch.tensor(3)))
93
print('Rank 1 Inputs:', compute_z(torch.tensor([1]), torch.tensor([2]), torch.tensor([3])))
94
print('Rank 2 Inputs:', compute_z(torch.tensor([[1]]), torch.tensor([[2]]), torch.tensor([[3]])))
95

96

97
# ## PyTorch Tensor objects for storing and updating model parameters
98

99

100

101
a = torch.tensor(3.14, requires_grad=True)
102
b = torch.tensor([1.0, 2.0, 3.0], requires_grad=True) 
103
print(a)
104
print(b)
105

106

107

108

109
a.requires_grad
110

111

112

113

114
w = torch.tensor([1.0, 2.0, 3.0])
115

116
print(w.requires_grad)
117

118

119

120

121
w.requires_grad_()
122

123
print(w.requires_grad)
124

125

126

127

128

129

130
torch.manual_seed(1)
131
w = torch.empty(2, 3)
132
nn.init.xavier_normal_(w)
133
print(w)
134
 
135

136

137

138

139
class MyModule(nn.Module):
140
    def __init__(self):
141
        super().__init__()
142
        self.w1 = torch.empty(2, 3, requires_grad=True)
143
        nn.init.xavier_normal_(self.w1)
144
        self.w2 = torch.empty(1, 2, requires_grad=True)
145
        nn.init.xavier_normal_(self.w2)
146

147

148
# ## Computing gradients via automatic differentiation and GradientTape
149
# 
150

151
# ### Computing the gradients of the loss with respect to trainable variables
152

153

154

155
w = torch.tensor(1.0, requires_grad=True)
156
b = torch.tensor(0.5, requires_grad=True) 
157

158
x = torch.tensor([1.4])
159
y = torch.tensor([2.1])
160

161

162
z = torch.add(torch.mul(w, x), b)
163
 
164
loss = (y-z).pow(2).sum()
165
loss.backward()
166

167
print('dL/dw : ', w.grad)
168
print('dL/db : ', b.grad)
169

170

171

172

173
# verifying the computed gradient dL/dw
174
print(2 * x * ((w * x + b) - y))
175

176

177
# ## Simplifying implementations of common architectures via the torch.nn module
178
# 
179
# 
180

181
# ### Implementing models based on nn.Sequential
182

183

184

185
model = nn.Sequential(
186
    nn.Linear(4, 16),
187
    nn.ReLU(),
188
    nn.Linear(16, 32),
189
    nn.ReLU()
190
)
191

192
model
193

194

195
# #### Configuring layers
196
# 
197
#  * Initializers `nn.init`: https://pytorch.org/docs/stable/nn.init.html 
198
#  * L1 Regularizers `nn.L1Loss`: https://pytorch.org/docs/stable/generated/torch.nn.L1Loss.html#torch.nn.L1Loss
199
#  * L2 Regularizers `weight_decay`: https://pytorch.org/docs/stable/optim.html
200
#  * Activations: https://pytorch.org/docs/stable/nn.html#non-linear-activations-weighted-sum-nonlinearity  
201
#  
202

203

204

205
nn.init.xavier_uniform_(model[0].weight)
206
 
207
l1_weight = 0.01
208
l1_penalty = l1_weight * model[2].weight.abs().sum()
209

210

211
# #### Compiling a model
212
# 
213
#  * Optimizers `torch.optim`:  https://pytorch.org/docs/stable/optim.html#algorithms
214
#  * Loss Functins `tf.keras.losses`: https://pytorch.org/docs/stable/nn.html#loss-functions
215

216

217

218
loss_fn = nn.BCELoss()
219
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)
220

221

222
# ## Solving an XOR classification problem
223

224

225

226

227

228
np.random.seed(1)
229
torch.manual_seed(1)
230
x = np.random.uniform(low=-1, high=1, size=(200, 2))
231
y = np.ones(len(x))
232
y[x[:, 0] * x[:, 1]<0] = 0
233

234
n_train = 100
235
x_train = torch.tensor(x[:n_train, :], dtype=torch.float32)
236
y_train = torch.tensor(y[:n_train], dtype=torch.float32)
237
x_valid = torch.tensor(x[n_train:, :], dtype=torch.float32)
238
y_valid = torch.tensor(y[n_train:], dtype=torch.float32)
239

240
fig = plt.figure(figsize=(6, 6))
241
plt.plot(x[y==0, 0], 
242
         x[y==0, 1], 'o', alpha=0.75, markersize=10)
243
plt.plot(x[y==1, 0], 
244
         x[y==1, 1], '<', alpha=0.75, markersize=10)
245
plt.xlabel(r'$x_1$', size=15)
246
plt.ylabel(r'$x_2$', size=15)
247

248
#plt.savefig('figures/13_02.png', dpi=300)
249
plt.show()
250

251

252

253

254
 
255
    
256
train_ds = TensorDataset(x_train, y_train)
257
batch_size = 2
258
torch.manual_seed(1)
259
train_dl = DataLoader(train_ds, batch_size, shuffle=True)
260

261

262

263

264
model = nn.Sequential(
265
    nn.Linear(2, 1),
266
    nn.Sigmoid()
267
)
268

269
model
270

271

272

273

274
loss_fn = nn.BCELoss()
275
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)
276

277

278

279

280
torch.manual_seed(1)
281
num_epochs = 200
282
def train(model, num_epochs, train_dl, x_valid, y_valid):
283
    loss_hist_train = [0] * num_epochs
284
    accuracy_hist_train = [0] * num_epochs
285
    loss_hist_valid = [0] * num_epochs
286
    accuracy_hist_valid = [0] * num_epochs
287
    for epoch in range(num_epochs):
288
        for x_batch, y_batch in train_dl:
289
            pred = model(x_batch)[:, 0]
290
            loss = loss_fn(pred, y_batch)
291
            loss.backward()
292
            optimizer.step()
293
            optimizer.zero_grad()
294
            loss_hist_train[epoch] += loss.item()
295
            is_correct = ((pred>=0.5).float() == y_batch).float()
296
            accuracy_hist_train[epoch] += is_correct.mean()
297

298
        loss_hist_train[epoch] /= n_train/batch_size
299
        accuracy_hist_train[epoch] /= n_train/batch_size
300

301
        pred = model(x_valid)[:, 0]
302
        loss = loss_fn(pred, y_valid)
303
        loss_hist_valid[epoch] = loss.item()
304
        is_correct = ((pred>=0.5).float() == y_valid).float()
305
        accuracy_hist_valid[epoch] += is_correct.mean()
306
    return loss_hist_train, loss_hist_valid, accuracy_hist_train, accuracy_hist_valid
307

308
history = train(model, num_epochs, train_dl, x_valid, y_valid)
309

310

311

312

313
fig = plt.figure(figsize=(16, 4))
314
ax = fig.add_subplot(1, 2, 1)
315
plt.plot(history[0], lw=4)
316
plt.plot(history[1], lw=4)
317
plt.legend(['Train loss', 'Validation loss'], fontsize=15)
318
ax.set_xlabel('Epochs', size=15)
319

320
ax = fig.add_subplot(1, 2, 2)
321
plt.plot(history[2], lw=4)
322
plt.plot(history[3], lw=4)
323
plt.legend(['Train acc.', 'Validation acc.'], fontsize=15)
324
ax.set_xlabel('Epochs', size=15)
325

326
#plt.savefig('figures/13_03.png', dpi=300)
327

328

329

330

331
model = nn.Sequential(
332
    nn.Linear(2, 4),
333
    nn.ReLU(),
334
    nn.Linear(4, 4),
335
    nn.ReLU(),
336
    nn.Linear(4, 1),
337
    nn.Sigmoid()
338
)
339
 
340
loss_fn = nn.BCELoss()
341
optimizer = torch.optim.SGD(model.parameters(), lr=0.015)
342

343
model
344

345

346

347

348
history = train(model, num_epochs, train_dl, x_valid, y_valid)
349

350

351

352

353
fig = plt.figure(figsize=(16, 4))
354
ax = fig.add_subplot(1, 2, 1)
355
plt.plot(history[0], lw=4)
356
plt.plot(history[1], lw=4)
357
plt.legend(['Train loss', 'Validation loss'], fontsize=15)
358
ax.set_xlabel('Epochs', size=15)
359

360
ax = fig.add_subplot(1, 2, 2)
361
plt.plot(history[2], lw=4)
362
plt.plot(history[3], lw=4)
363
plt.legend(['Train acc.', 'Validation acc.'], fontsize=15)
364
ax.set_xlabel('Epochs', size=15)
365

366
plt.savefig('figures/13_04.png', dpi=300)
367

368

369
# ## Making model building more flexible with nn.Module
370
# 
371
# 
372

373

374

375
class MyModule(nn.Module):
376
    def __init__(self):
377
        super().__init__()
378
        l1 = nn.Linear(2, 4)
379
        a1 = nn.ReLU()
380
        l2 = nn.Linear(4, 4)
381
        a2 = nn.ReLU()
382
        l3 = nn.Linear(4, 1)
383
        a3 = nn.Sigmoid()
384
        l = [l1, a1, l2, a2, l3, a3]
385
        self.module_list = nn.ModuleList(l)
386

387
    def forward(self, x):
388
        for f in self.module_list:
389
            x = f(x)
390
        return x
391
    
392
    def predict(self, x):
393
        x = torch.tensor(x, dtype=torch.float32)
394
        pred = self.forward(x)[:, 0]
395
        return (pred>=0.5).float()
396
            
397
model = MyModule()
398
model
399

400

401

402

403
loss_fn = nn.BCELoss()
404
optimizer = torch.optim.SGD(model.parameters(), lr=0.015)
405
    
406
# torch.manual_seed(1)
407
history = train(model, num_epochs, train_dl, x_valid, y_valid)
408

409

410

411

412
# !pip install mlxtend
413

414

415

416

417

418

419
fig = plt.figure(figsize=(16, 4))
420
ax = fig.add_subplot(1, 3, 1)
421
plt.plot(history[0], lw=4)
422
plt.plot(history[1], lw=4)
423
plt.legend(['Train loss', 'Validation loss'], fontsize=15)
424
ax.set_xlabel('Epochs', size=15)
425

426
ax = fig.add_subplot(1, 3, 2)
427
plt.plot(history[2], lw=4)
428
plt.plot(history[3], lw=4)
429
plt.legend(['Train acc.', 'Validation acc.'], fontsize=15)
430
ax.set_xlabel('Epochs', size=15)
431

432
ax = fig.add_subplot(1, 3, 3)
433
plot_decision_regions(X=x_valid.numpy(), 
434
                      y=y_valid.numpy().astype(np.int64),
435
                      clf=model)
436
ax.set_xlabel(r'$x_1$', size=15)
437
ax.xaxis.set_label_coords(1, -0.025)
438
ax.set_ylabel(r'$x_2$', size=15)
439
ax.yaxis.set_label_coords(-0.025, 1)
440
plt.show()
441

442

443
# ## Writing custom layers in PyTorch
444
# 
445

446

447

448
class NoisyLinear(nn.Module):
449
    def __init__(self, input_size, output_size, noise_stddev=0.1):
450
        super().__init__()
451
        w = torch.Tensor(input_size, output_size)
452
        self.w = nn.Parameter(w)  # nn.Parameter is a Tensor that's a module parameter.
453
        nn.init.xavier_uniform_(self.w)
454
        b = torch.Tensor(output_size).fill_(0)
455
        self.b = nn.Parameter(b)
456
        self.noise_stddev = noise_stddev
457

458
    def forward(self, x, training=False):
459
        if training:
460
            noise = torch.normal(0.0, self.noise_stddev, x.shape)
461
            x_new = torch.add(x, noise)
462
        else:
463
            x_new = x
464
        return torch.add(torch.mm(x_new, self.w), self.b)   
465

466

467

468

469
## testing:
470

471
torch.manual_seed(1)
472

473
noisy_layer = NoisyLinear(4, 2)
474
 
475
x = torch.zeros((1, 4))
476
print(noisy_layer(x, training=True))
477

478
print(noisy_layer(x, training=True))
479
 
480
print(noisy_layer(x, training=False))
481
 
482

483

484

485

486
class MyNoisyModule(nn.Module):
487
    def __init__(self):
488
        super().__init__()
489
        self.l1 = NoisyLinear(2, 4, 0.07)
490
        self.a1 = nn.ReLU()
491
        self.l2 = nn.Linear(4, 4)
492
        self.a2 = nn.ReLU()
493
        self.l3 = nn.Linear(4, 1)
494
        self.a3 = nn.Sigmoid()
495
        
496
    def forward(self, x, training=False):
497
        x = self.l1(x, training)
498
        x = self.a1(x)
499
        x = self.l2(x)
500
        x = self.a2(x)
501
        x = self.l3(x)
502
        x = self.a3(x)
503
        return x
504
    
505
    def predict(self, x):
506
        x = torch.tensor(x, dtype=torch.float32)
507
        pred = self.forward(x)[:, 0]
508
        return (pred>=0.5).float()
509

510
torch.manual_seed(1)
511
model = MyNoisyModule()
512
model
513

514

515

516

517
loss_fn = nn.BCELoss()
518
optimizer = torch.optim.SGD(model.parameters(), lr=0.015)
519
    
520
torch.manual_seed(1)
521

522
loss_hist_train = [0] * num_epochs
523
accuracy_hist_train = [0] * num_epochs
524
loss_hist_valid = [0] * num_epochs
525
accuracy_hist_valid = [0] * num_epochs
526
for epoch in range(num_epochs):
527
    for x_batch, y_batch in train_dl:
528
        pred = model(x_batch, True)[:, 0]
529
        loss = loss_fn(pred, y_batch)
530
        loss.backward()
531
        optimizer.step()
532
        optimizer.zero_grad()
533
        loss_hist_train[epoch] += loss.item()
534
        is_correct = ((pred>=0.5).float() == y_batch).float()
535
        accuracy_hist_train[epoch] += is_correct.mean()
536

537
    loss_hist_train[epoch] /= n_train/batch_size
538
    accuracy_hist_train[epoch] /= n_train/batch_size
539

540
    pred = model(x_valid)[:, 0]
541
    loss = loss_fn(pred, y_valid)
542
    loss_hist_valid[epoch] = loss.item()
543
    is_correct = ((pred>=0.5).float() == y_valid).float()
544
    accuracy_hist_valid[epoch] += is_correct.mean()
545

546

547

548

549

550

551
fig = plt.figure(figsize=(16, 4))
552
ax = fig.add_subplot(1, 3, 1)
553
plt.plot(loss_hist_train, lw=4)
554
plt.plot(loss_hist_valid, lw=4)
555
plt.legend(['Train loss', 'Validation loss'], fontsize=15)
556
ax.set_xlabel('Epochs', size=15)
557

558
ax = fig.add_subplot(1, 3, 2)
559
plt.plot(accuracy_hist_train, lw=4)
560
plt.plot(accuracy_hist_valid, lw=4)
561
plt.legend(['Train acc.', 'Validation acc.'], fontsize=15)
562
ax.set_xlabel('Epochs', size=15)
563

564
ax = fig.add_subplot(1, 3, 3)
565
plot_decision_regions(X=x_valid.numpy(), 
566
                      y=y_valid.numpy().astype(np.int64),
567
                      clf=model)
568
ax.set_xlabel(r'$x_1$', size=15)
569
ax.xaxis.set_label_coords(1, -0.025)
570
ax.set_ylabel(r'$x_2$', size=15)
571
ax.yaxis.set_label_coords(-0.025, 1)
572
plt.show()
573

574

575
# ---
576
# 
577
# Readers may ignore the next cell.
578

579

580

581

582

583

584

585

586

587

588
Product

Resources

Company