Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
rasbt
GitHub Repository: rasbt/machine-learning-book
Path: blob/main/ch13/ch13_part1.ipynb
1245 views
Kernel: Python 3 (ipykernel)

Machine Learning with PyTorch and Scikit-Learn

-- Code Examples

Package version checks

Add folder to path in order to load from the check_packages.py script:

import sys sys.path.insert(0, '..')

Check recommended package versions:

from python_environment_check import check_packages d = { 'numpy': '1.21.2', 'matplotlib': '3.4.3', 'torch': '1.8', 'mlxtend': '0.19.0' } check_packages(d)
[OK] Your Python version is 3.9.7 | packaged by conda-forge | (default, Sep 29 2021, 19:24:02) [Clang 11.1.0 ] [OK] numpy 1.23.1 [OK] matplotlib 3.5.1 [OK] torch 1.12.0 [OK] mlxtend 0.19.0

Chapter 13: Going Deeper -- the Mechanics of PyTorch (Part 1/3)

from IPython.display import Image %matplotlib inline

The key features of PyTorch

PyTorch's computation graphs

Understanding computation graphs

Image(filename='figures/13_01.png', width=400)
Image in a Jupyter notebook

Creating a graph in PyTorch

import torch
def compute_z(a, b, c): r1 = torch.sub(a, b) r2 = torch.mul(r1, 2) z = torch.add(r2, c) return z print('Scalar Inputs:', compute_z(torch.tensor(1), torch.tensor(2), torch.tensor(3))) print('Rank 1 Inputs:', compute_z(torch.tensor([1]), torch.tensor([2]), torch.tensor([3]))) print('Rank 2 Inputs:', compute_z(torch.tensor([[1]]), torch.tensor([[2]]), torch.tensor([[3]])))
Scalar Inputs: tensor(1) Rank 1 Inputs: tensor([1]) Rank 2 Inputs: tensor([[1]])

PyTorch Tensor objects for storing and updating model parameters

a = torch.tensor(3.14, requires_grad=True) b = torch.tensor([1.0, 2.0, 3.0], requires_grad=True) print(a) print(b)
tensor(3.1400, requires_grad=True) tensor([1., 2., 3.], requires_grad=True)
a.requires_grad
True
w = torch.tensor([1.0, 2.0, 3.0]) print(w.requires_grad)
False
w.requires_grad_() print(w.requires_grad)
True
import torch.nn as nn torch.manual_seed(1) w = torch.empty(2, 3) nn.init.xavier_normal_(w) print(w)
tensor([[ 0.4183, 0.1688, 0.0390], [ 0.3930, -0.2858, -0.1051]])
class MyModule(nn.Module): def __init__(self): super().__init__() self.w1 = torch.empty(2, 3, requires_grad=True) nn.init.xavier_normal_(self.w1) self.w2 = torch.empty(1, 2, requires_grad=True) nn.init.xavier_normal_(self.w2)

Computing gradients via automatic differentiation and GradientTape

Computing the gradients of the loss with respect to trainable variables

w = torch.tensor(1.0, requires_grad=True) b = torch.tensor(0.5, requires_grad=True) x = torch.tensor([1.4]) y = torch.tensor([2.1]) z = torch.add(torch.mul(w, x), b) loss = (y-z).pow(2).sum() loss.backward() print('dL/dw : ', w.grad) print('dL/db : ', b.grad)
dL/dw : tensor(-0.5600) dL/db : tensor(-0.4000)
# verifying the computed gradient dL/dw print(2 * x * ((w * x + b) - y))
tensor([-0.5600], grad_fn=<MulBackward0>)

Simplifying implementations of common architectures via the torch.nn module

Implementing models based on nn.Sequential

model = nn.Sequential( nn.Linear(4, 16), nn.ReLU(), nn.Linear(16, 32), nn.ReLU() ) model
Sequential( (0): Linear(in_features=4, out_features=16, bias=True) (1): ReLU() (2): Linear(in_features=16, out_features=32, bias=True) (3): ReLU() )
nn.init.xavier_uniform_(model[0].weight) l1_weight = 0.01 l1_penalty = l1_weight * model[2].weight.abs().sum()

Compiling a model

loss_fn = nn.BCELoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.001)

Solving an XOR classification problem

import numpy as np import matplotlib.pyplot as plt %matplotlib inline np.random.seed(1) torch.manual_seed(1) x = np.random.uniform(low=-1, high=1, size=(200, 2)) y = np.ones(len(x)) y[x[:, 0] * x[:, 1]<0] = 0 n_train = 100 x_train = torch.tensor(x[:n_train, :], dtype=torch.float32) y_train = torch.tensor(y[:n_train], dtype=torch.float32) x_valid = torch.tensor(x[n_train:, :], dtype=torch.float32) y_valid = torch.tensor(y[n_train:], dtype=torch.float32) fig = plt.figure(figsize=(6, 6)) plt.plot(x[y==0, 0], x[y==0, 1], 'o', alpha=0.75, markersize=10) plt.plot(x[y==1, 0], x[y==1, 1], '<', alpha=0.75, markersize=10) plt.xlabel(r'$x_1$', size=15) plt.ylabel(r'$x_2$', size=15) #plt.savefig('figures/13_02.png', dpi=300) plt.show()
Image in a Jupyter notebook
from torch.utils.data import DataLoader, TensorDataset train_ds = TensorDataset(x_train, y_train) batch_size = 2 torch.manual_seed(1) train_dl = DataLoader(train_ds, batch_size, shuffle=True)
model = nn.Sequential( nn.Linear(2, 1), nn.Sigmoid() ) model
Sequential( (0): Linear(in_features=2, out_features=1, bias=True) (1): Sigmoid() )
loss_fn = nn.BCELoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.001)
torch.manual_seed(1) num_epochs = 200 def train(model, num_epochs, train_dl, x_valid, y_valid): loss_hist_train = [0] * num_epochs accuracy_hist_train = [0] * num_epochs loss_hist_valid = [0] * num_epochs accuracy_hist_valid = [0] * num_epochs for epoch in range(num_epochs): for x_batch, y_batch in train_dl: pred = model(x_batch)[:, 0] loss = loss_fn(pred, y_batch) loss.backward() optimizer.step() optimizer.zero_grad() loss_hist_train[epoch] += loss.item() is_correct = ((pred>=0.5).float() == y_batch).float() accuracy_hist_train[epoch] += is_correct.mean() loss_hist_train[epoch] /= n_train/batch_size accuracy_hist_train[epoch] /= n_train/batch_size pred = model(x_valid)[:, 0] loss = loss_fn(pred, y_valid) loss_hist_valid[epoch] = loss.item() is_correct = ((pred>=0.5).float() == y_valid).float() accuracy_hist_valid[epoch] += is_correct.mean() return loss_hist_train, loss_hist_valid, accuracy_hist_train, accuracy_hist_valid history = train(model, num_epochs, train_dl, x_valid, y_valid)
fig = plt.figure(figsize=(16, 4)) ax = fig.add_subplot(1, 2, 1) plt.plot(history[0], lw=4) plt.plot(history[1], lw=4) plt.legend(['Train loss', 'Validation loss'], fontsize=15) ax.set_xlabel('Epochs', size=15) ax = fig.add_subplot(1, 2, 2) plt.plot(history[2], lw=4) plt.plot(history[3], lw=4) plt.legend(['Train acc.', 'Validation acc.'], fontsize=15) ax.set_xlabel('Epochs', size=15) #plt.savefig('figures/13_03.png', dpi=300)
Text(0.5, 0, 'Epochs')
Image in a Jupyter notebook
model = nn.Sequential( nn.Linear(2, 4), nn.ReLU(), nn.Linear(4, 4), nn.ReLU(), nn.Linear(4, 1), nn.Sigmoid() ) loss_fn = nn.BCELoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.015) model
Sequential( (0): Linear(in_features=2, out_features=4, bias=True) (1): ReLU() (2): Linear(in_features=4, out_features=4, bias=True) (3): ReLU() (4): Linear(in_features=4, out_features=1, bias=True) (5): Sigmoid() )
history = train(model, num_epochs, train_dl, x_valid, y_valid)
fig = plt.figure(figsize=(16, 4)) ax = fig.add_subplot(1, 2, 1) plt.plot(history[0], lw=4) plt.plot(history[1], lw=4) plt.legend(['Train loss', 'Validation loss'], fontsize=15) ax.set_xlabel('Epochs', size=15) ax = fig.add_subplot(1, 2, 2) plt.plot(history[2], lw=4) plt.plot(history[3], lw=4) plt.legend(['Train acc.', 'Validation acc.'], fontsize=15) ax.set_xlabel('Epochs', size=15) #plt.savefig('figures/13_04.png', dpi=300)
Text(0.5, 0, 'Epochs')
Image in a Jupyter notebook

Making model building more flexible with nn.Module

class MyModule(nn.Module): def __init__(self): super().__init__() l1 = nn.Linear(2, 4) a1 = nn.ReLU() l2 = nn.Linear(4, 4) a2 = nn.ReLU() l3 = nn.Linear(4, 1) a3 = nn.Sigmoid() l = [l1, a1, l2, a2, l3, a3] self.module_list = nn.ModuleList(l) def forward(self, x): for f in self.module_list: x = f(x) return x def predict(self, x): x = torch.tensor(x, dtype=torch.float32) pred = self.forward(x)[:, 0] return (pred>=0.5).float() model = MyModule() model
MyModule( (module_list): ModuleList( (0): Linear(in_features=2, out_features=4, bias=True) (1): ReLU() (2): Linear(in_features=4, out_features=4, bias=True) (3): ReLU() (4): Linear(in_features=4, out_features=1, bias=True) (5): Sigmoid() ) )
loss_fn = nn.BCELoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.015) # torch.manual_seed(1) history = train(model, num_epochs, train_dl, x_valid, y_valid)
# !pip install mlxtend
from mlxtend.plotting import plot_decision_regions fig = plt.figure(figsize=(16, 4)) ax = fig.add_subplot(1, 3, 1) plt.plot(history[0], lw=4) plt.plot(history[1], lw=4) plt.legend(['Train loss', 'Validation loss'], fontsize=15) ax.set_xlabel('Epochs', size=15) ax = fig.add_subplot(1, 3, 2) plt.plot(history[2], lw=4) plt.plot(history[3], lw=4) plt.legend(['Train acc.', 'Validation acc.'], fontsize=15) ax.set_xlabel('Epochs', size=15) ax = fig.add_subplot(1, 3, 3) plot_decision_regions(X=x_valid.numpy(), y=y_valid.numpy().astype(np.int64), clf=model) ax.set_xlabel(r'$x_1$', size=15) ax.xaxis.set_label_coords(1, -0.025) ax.set_ylabel(r'$x_2$', size=15) ax.yaxis.set_label_coords(-0.025, 1) #plt.savefig('figures/13_05.png', dpi=300) plt.show()
Image in a Jupyter notebook

Writing custom layers in PyTorch

class NoisyLinear(nn.Module): def __init__(self, input_size, output_size, noise_stddev=0.1): super().__init__() w = torch.Tensor(input_size, output_size) self.w = nn.Parameter(w) # nn.Parameter is a Tensor that's a module parameter. nn.init.xavier_uniform_(self.w) b = torch.Tensor(output_size).fill_(0) self.b = nn.Parameter(b) self.noise_stddev = noise_stddev def forward(self, x, training=False): if training: noise = torch.normal(0.0, self.noise_stddev, x.shape) x_new = torch.add(x, noise) else: x_new = x return torch.add(torch.mm(x_new, self.w), self.b)
## testing: torch.manual_seed(1) noisy_layer = NoisyLinear(4, 2) x = torch.zeros((1, 4)) print(noisy_layer(x, training=True)) print(noisy_layer(x, training=True)) print(noisy_layer(x, training=False))
tensor([[ 0.1154, -0.0598]], grad_fn=<AddBackward0>) tensor([[ 0.0432, -0.0375]], grad_fn=<AddBackward0>) tensor([[0., 0.]], grad_fn=<AddBackward0>)
class MyNoisyModule(nn.Module): def __init__(self): super().__init__() self.l1 = NoisyLinear(2, 4, 0.07) self.a1 = nn.ReLU() self.l2 = nn.Linear(4, 4) self.a2 = nn.ReLU() self.l3 = nn.Linear(4, 1) self.a3 = nn.Sigmoid() def forward(self, x, training=False): x = self.l1(x, training) x = self.a1(x) x = self.l2(x) x = self.a2(x) x = self.l3(x) x = self.a3(x) return x def predict(self, x): x = torch.tensor(x, dtype=torch.float32) pred = self.forward(x)[:, 0] return (pred>=0.5).float() torch.manual_seed(1) model = MyNoisyModule() model
MyNoisyModule( (l1): NoisyLinear() (a1): ReLU() (l2): Linear(in_features=4, out_features=4, bias=True) (a2): ReLU() (l3): Linear(in_features=4, out_features=1, bias=True) (a3): Sigmoid() )
loss_fn = nn.BCELoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.015) torch.manual_seed(1) loss_hist_train = [0] * num_epochs accuracy_hist_train = [0] * num_epochs loss_hist_valid = [0] * num_epochs accuracy_hist_valid = [0] * num_epochs for epoch in range(num_epochs): for x_batch, y_batch in train_dl: pred = model(x_batch, True)[:, 0] loss = loss_fn(pred, y_batch) loss.backward() optimizer.step() optimizer.zero_grad() loss_hist_train[epoch] += loss.item() is_correct = ((pred>=0.5).float() == y_batch).float() accuracy_hist_train[epoch] += is_correct.mean() loss_hist_train[epoch] /= n_train/batch_size accuracy_hist_train[epoch] /= n_train/batch_size pred = model(x_valid)[:, 0] loss = loss_fn(pred, y_valid) loss_hist_valid[epoch] = loss.item() is_correct = ((pred>=0.5).float() == y_valid).float() accuracy_hist_valid[epoch] += is_correct.mean()
from mlxtend.plotting import plot_decision_regions fig = plt.figure(figsize=(16, 4)) ax = fig.add_subplot(1, 3, 1) plt.plot(loss_hist_train, lw=4) plt.plot(loss_hist_valid, lw=4) plt.legend(['Train loss', 'Validation loss'], fontsize=15) ax.set_xlabel('Epochs', size=15) ax = fig.add_subplot(1, 3, 2) plt.plot(accuracy_hist_train, lw=4) plt.plot(accuracy_hist_valid, lw=4) plt.legend(['Train acc.', 'Validation acc.'], fontsize=15) ax.set_xlabel('Epochs', size=15) ax = fig.add_subplot(1, 3, 3) plot_decision_regions(X=x_valid.numpy(), y=y_valid.numpy().astype(np.int64), clf=model) ax.set_xlabel(r'$x_1$', size=15) ax.xaxis.set_label_coords(1, -0.025) ax.set_ylabel(r'$x_2$', size=15) ax.yaxis.set_label_coords(-0.025, 1) #plt.savefig('figures/13_06.png', dpi=300) plt.show()
Image in a Jupyter notebook

Readers may ignore the next cell.

! python ../.convert_notebook_to_script.py --input ch13_part1.ipynb --output ch13_part1.py
[NbConvertApp] Converting notebook ch13_part1.ipynb to script [NbConvertApp] Writing 14084 bytes to ch13_part1.py