Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
probml
GitHub Repository: probml/pyprobml
Path: blob/master/notebooks/book1/13/mlp_cifar_pytorch.ipynb
1192 views
Kernel: Python 3

Open In Colab

MLP for image classification using PyTorch

In this section, we follow Chap. 7 of the Deep Learning With PyTorch book, and illustrate how to fit an MLP to a two-class version of CIFAR. (We modify the code from here.)

import sklearn import scipy import scipy.optimize import matplotlib.pyplot as plt from mpl_toolkits import mplot3d from mpl_toolkits.mplot3d import Axes3D import seaborn as sns import warnings warnings.filterwarnings("ignore") import itertools import time from functools import partial import os import numpy as np from scipy.special import logsumexp np.set_printoptions(precision=3)
import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import torchvision print("torch version {}".format(torch.__version__)) if torch.cuda.is_available(): print(torch.cuda.get_device_name(0)) print("current device {}".format(torch.cuda.current_device())) else: print("Torch cannot find GPU") def set_seed(seed): np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) use_cuda = torch.cuda.is_available() device = torch.device("cuda:0" if use_cuda else "cpu") # torch.backends.cudnn.benchmark = True
torch version 1.8.0+cu101 Tesla V100-SXM2-16GB current device 0

Get the CIFAR dataset

from torchvision import datasets folder = "data" cifar10 = datasets.CIFAR10(folder, train=True, download=True) cifar10_val = datasets.CIFAR10(folder, train=False, download=True)
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz
HBox(children=(FloatProgress(value=0.0, max=170498071.0), HTML(value='')))
Extracting data/cifar-10-python.tar.gz to data Files already downloaded and verified
print(type(cifar10)) print(type(cifar10).__mro__) # module resolution order shows class hierarchy
<class 'torchvision.datasets.cifar.CIFAR10'> (<class 'torchvision.datasets.cifar.CIFAR10'>, <class 'torchvision.datasets.vision.VisionDataset'>, <class 'torch.utils.data.dataset.Dataset'>, <class 'typing.Generic'>, <class 'object'>)
print(len(cifar10)) img, label = cifar10[99] print(type(img)) print(img) plt.imshow(img) plt.show()
50000 <class 'PIL.Image.Image'> <PIL.Image.Image image mode=RGB size=32x32 at 0x7F4ACCD47ED0>
Image in a Jupyter notebook
class_names = ["airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"] fig = plt.figure(figsize=(8, 3)) num_classes = 10 for i in range(num_classes): ax = fig.add_subplot(2, 5, 1 + i, xticks=[], yticks=[]) ax.set_title(class_names[i]) img = next(img for img, label in cifar10 if label == i) plt.imshow(img) plt.show()
Image in a Jupyter notebook

Convert to tensors

# Now we want to convert this to a tensor from torchvision import transforms to_tensor = transforms.ToTensor() img, label = cifar10[99] img_t = to_tensor(img) print(type(img)) # print(img.shape) print(type(img_t)) print(img_t.shape) # channels * height * width, here channels=3 (RGB) print(img_t.min(), img_t.max()) # pixel values are rescaled to 0..1
<class 'PIL.Image.Image'> <class 'torch.Tensor'> torch.Size([3, 32, 32]) tensor(0.) tensor(1.)
# transform the whole dataset to tensors cifar10 = datasets.CIFAR10(folder, train=True, download=False, transform=transforms.ToTensor())
img, label = cifar10[99] print(type(img)) plt.imshow(img.permute(1, 2, 0)) # matplotlib expects H*W*C plt.show()
<class 'torch.Tensor'>
Image in a Jupyter notebook

Standardize the inputs

We standardize the features by computing the mean and std of each channel, averaging across all pixels and all images. This will help optimization.

# we load the whole training set as a batch, of size 3*H*W*N imgs = torch.stack([img for img, _ in cifar10], dim=3) print(imgs.shape)
torch.Size([3, 32, 32, 50000])
imgs_flat = imgs.view(3, -1) # reshape by keeping first 3 channels, but flatten all others print(imgs_flat.shape) mu = imgs_flat.mean(dim=1) # average over second dimension (H*W*N) to get one mean per channel sigma = imgs_flat.std(dim=1) print(mu) print(sigma)
torch.Size([3, 51200000]) tensor([0.4914, 0.4822, 0.4465]) tensor([0.2470, 0.2435, 0.2616])
cifar10 = datasets.CIFAR10( folder, train=True, download=False, transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize(mu, sigma)]), ) cifar10_val = datasets.CIFAR10( folder, train=False, download=False, transform=transforms.Compose( [ transforms.ToTensor(), transforms.Normalize(mu, sigma), ] ), )
# rescaled data is harder to visualize img, _ = cifar10[99] plt.imshow(img.permute(1, 2, 0)) plt.show()
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Image in a Jupyter notebook

Create two-class version of dataset

We extract data which correspond to airplane or bird. The result object is a list of pairs. This "acts like" an object of type torch.utilts.data.dataset.Dataset, since it implements the len() and item index methods.

class_names = ["airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"] label_map = {0: 0, 2: 1} # 0(airplane)->0, 2(bird)->1 cifar2 = [(img, label_map[label]) for img, label in cifar10 if label in [0, 2]] cifar2_val = [(img, label_map[label]) for img, label in cifar10_val if label in [0, 2]]
print(len(cifar2)) print(len(cifar2_val))
10000 2000

A shallow, fully connected model

img, label = cifar10[0] img = img.view(-1) ninputs = len(img) nhidden = 512 nclasses = 2
torch.manual_seed(0) model = nn.Sequential(nn.Linear(ninputs, nhidden), nn.Tanh(), nn.Linear(nhidden, nclasses), nn.LogSoftmax(dim=1)) print(model)
Sequential( (0): Linear(in_features=3072, out_features=512, bias=True) (1): Tanh() (2): Linear(in_features=512, out_features=2, bias=True) (3): LogSoftmax(dim=1) )

We can name the layers so we can access their activations and/or parameters more easily.

torch.manual_seed(0) from collections import OrderedDict model = nn.Sequential( OrderedDict( [ ("hidden_linear", nn.Linear(ninputs, nhidden)), ("activation", nn.Tanh()), ("output_linear", nn.Linear(nhidden, nclasses)), ("softmax", nn.LogSoftmax(dim=1)), ] ) ) print(model)
Sequential( (hidden_linear): Linear(in_features=3072, out_features=512, bias=True) (activation): Tanh() (output_linear): Linear(in_features=512, out_features=2, bias=True) (softmax): LogSoftmax(dim=1) )

Let's test the model.

img, label = cifar2[0] img_batch = img.view(-1).unsqueeze(0) print(img_batch.shape) logprobs = model(img_batch) print(logprobs.shape) print(logprobs) probs = torch.exp(logprobs) # elementwise print(probs) print(probs.sum(1))
torch.Size([1, 3072]) torch.Size([1, 2]) tensor([[-0.6193, -0.7729]], grad_fn=<LogSoftmaxBackward>) tensor([[0.5383, 0.4617]], grad_fn=<ExpBackward>) tensor([1.], grad_fn=<SumBackward1>)

Negative log likelihood loss.

loss_fn = nn.NLLLoss() loss = loss_fn(logprobs, torch.tensor([label])) print(loss)
tensor(0.7729, grad_fn=<NllLossBackward>)

Let's access the output of the logit layer directly, bypassing the final log softmax. (We borrow a trick from here).

activation = {} def get_activation(name): def hook(model, input, output): activation[name] = output.detach() return hook model.output_linear.register_forward_hook(get_activation("output_linear")) logprobs = model(img_batch).detach().numpy() logits = activation["output_linear"] logprobs2 = F.log_softmax(logits).detach().numpy() print(logprobs) print(logprobs2) assert np.allclose(logprobs, logprobs2)
[[-0.619 -0.773]] [[-0.619 -0.773]]

We can also modify the model to return logits.

torch.manual_seed(0) model_logits = nn.Sequential(nn.Linear(ndims_input, nhidden), nn.Tanh(), nn.Linear(nhidden, nclasses)) logits2 = model_logits(img_batch) print(logits) print(logits2) torch.testing.assert_allclose(logits, logits2)
tensor([[-0.1771, -0.3307]]) tensor([[-0.1771, -0.3307]], grad_fn=<AddmmBackward>)

In this case, we need to modify the loss to take in logits.

logprobs = model(img_batch) loss = nn.NLLLoss()(logprobs, torch.tensor([label])) logits = model_logits(img_batch) loss2 = nn.CrossEntropyLoss()(logits, torch.tensor([label])) print(loss) print(loss2) torch.testing.assert_allclose(loss, loss2)
tensor(0.7729, grad_fn=<NllLossBackward>) tensor(0.7729, grad_fn=<NllLossBackward>)

We can also use the functional API to specify the model. This avoids having to create stateless layers (i.e., layers with no adjustable parameters), such as the tanh or softmax layers.

class MLP(nn.Module): def __init__(self, ninputs, nhidden, nclasses): super().__init__() self.fc1 = nn.Linear(ninputs, nhidden) self.fc2 = nn.Linear(nhidden, nclasses) def forward(self, x): out = F.tanh(self.fc1(x)) out = self.fc2(out) return out # logits torch.manual_seed(0) model = MLP(ninputs, nhidden, nclasses) logits = model(img_batch) logits2 = model_logits(img_batch) print(logits) print(logits2) torch.testing.assert_allclose(logits, logits2) # print(list(model.named_parameters())) nparams = [p.numel() for p in model.parameters() if p.requires_grad == True] print(nparams) # weights1, bias1, weights2, bias2 print([ninputs * nhidden, nhidden, nhidden * nclasses, nclasses])
tensor([[-0.1771, -0.3307]], grad_fn=<AddmmBackward>) tensor([[-0.1771, -0.3307]], grad_fn=<AddmmBackward>) [1572864, 512, 1024, 2] [1572864, 512, 1024, 2]

Evaluation pre-training

def compute_accuracy(model, loader): correct = 0 total = 0 with torch.no_grad(): for imgs, labels in loader: outputs = model(imgs.view(imgs.shape[0], -1)) _, predicted = torch.max(outputs, dim=1) total += labels.shape[0] correct += int((predicted == labels).sum()) return correct / total train_loader = torch.utils.data.DataLoader(cifar2, batch_size=64, shuffle=False) val_loader = torch.utils.data.DataLoader(cifar2_val, batch_size=64, shuffle=False) torch.manual_seed(0) model = MLP(ninputs, nhidden, nclasses) acc_train = compute_accuracy(model, train_loader) acc_val = compute_accuracy(model, val_loader) print([acc_train, acc_val])
[0.4711, 0.4775]

Training loop

torch.manual_seed(0) model = MLP(ninputs, nhidden, nclasses) learning_rate = 1e-2 optimizer = optim.SGD(model.parameters(), lr=learning_rate) loss_fn = nn.CrossEntropyLoss() n_epochs = 20 for epoch in range(n_epochs): for imgs, labels in train_loader: outputs = model(imgs.view(imgs.shape[0], -1)) loss = loss_fn(outputs, labels) optimizer.zero_grad() loss.backward() optimizer.step() # At end of each epoch acc_val = compute_accuracy(model, val_loader) loss_train_batch = float(loss) print(f"Epoch {epoch}, Batch Loss {loss_train_batch}, Val acc {acc_val}")
Epoch 0, Batch Loss 0.7986931204795837, Val acc 0.758 Epoch 1, Batch Loss 0.7316719889640808, Val acc 0.781 Epoch 2, Batch Loss 0.6822570562362671, Val acc 0.7895 Epoch 3, Batch Loss 0.6403710842132568, Val acc 0.7935 Epoch 4, Batch Loss 0.603853702545166, Val acc 0.796 Epoch 5, Batch Loss 0.5708653926849365, Val acc 0.8035 Epoch 6, Batch Loss 0.5402626991271973, Val acc 0.8075 Epoch 7, Batch Loss 0.5115160942077637, Val acc 0.808 Epoch 8, Batch Loss 0.48429471254348755, Val acc 0.8115 Epoch 9, Batch Loss 0.45831188559532166, Val acc 0.8095 Epoch 10, Batch Loss 0.4333283007144928, Val acc 0.808 Epoch 11, Batch Loss 0.40905866026878357, Val acc 0.808 Epoch 12, Batch Loss 0.3851880431175232, Val acc 0.811 Epoch 13, Batch Loss 0.3614955246448517, Val acc 0.811 Epoch 14, Batch Loss 0.33788853883743286, Val acc 0.8105 Epoch 15, Batch Loss 0.31435394287109375, Val acc 0.8115 Epoch 16, Batch Loss 0.29094964265823364, Val acc 0.813 Epoch 17, Batch Loss 0.267885684967041, Val acc 0.812 Epoch 18, Batch Loss 0.2455700933933258, Val acc 0.814 Epoch 19, Batch Loss 0.22448962926864624, Val acc 0.8135