Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
probml
GitHub Repository: probml/pyprobml
Path: blob/master/notebooks/book1/14/conv2d_torch.ipynb
1192 views
Kernel: Python 3

Foundations of Convolutional neural nets

Based on sec 6.2 of http://d2l.ai/chapter_convolutional-neural-networks/conv-layer.html

import numpy as np import matplotlib.pyplot as plt np.random.seed(seed=1) import math try: import torch except ModuleNotFoundError: %pip install -qq torch import torch from torch import nn from torch.nn import functional as F !mkdir figures # for saving plots import warnings warnings.filterwarnings("ignore") # For reproducibility on different runs torch.backends.cudnn.deterministic = True torch.manual_seed(hash("by removing stochasticity") % 2**32 - 1) torch.cuda.manual_seed_all(hash("so runs are repeatable") % 2**32 - 1)

Cross correlation

# Cross correlation def corr2d(X, K): """Compute 2D cross-correlation.""" h, w = K.shape Y = torch.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1)) for i in range(Y.shape[0]): for j in range(Y.shape[1]): Y[i, j] = (X[i : i + h, j : j + w] * K).sum() return Y X = torch.tensor([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]]) K = torch.tensor([[0.0, 1.0], [2.0, 3.0]]) print(corr2d(X, K))
tensor([[19., 25.], [37., 43.]])

Edge detection

We make a small image X of 1s, with a vertical stripe (of width 4) of 0s in the middle.

X = torch.ones((6, 8)) X[:, 2:6] = 0 X
tensor([[1., 1., 0., 0., 0., 0., 1., 1.], [1., 1., 0., 0., 0., 0., 1., 1.], [1., 1., 0., 0., 0., 0., 1., 1.], [1., 1., 0., 0., 0., 0., 1., 1.], [1., 1., 0., 0., 0., 0., 1., 1.], [1., 1., 0., 0., 0., 0., 1., 1.]])

Now we apply a vertical edge detector. It fires on the 1-0 and 0-1 boundaries.

K = torch.tensor([[1.0, -1.0]]) Y = corr2d(X, K) print(Y)
tensor([[ 0., 1., 0., 0., 0., -1., 0.], [ 0., 1., 0., 0., 0., -1., 0.], [ 0., 1., 0., 0., 0., -1., 0.], [ 0., 1., 0., 0., 0., -1., 0.], [ 0., 1., 0., 0., 0., -1., 0.], [ 0., 1., 0., 0., 0., -1., 0.]])

It fails to detect horizontal edges.

corr2d(X.t(), K)
tensor([[0., 0., 0., 0., 0.], [0., 0., 0., 0., 0.], [0., 0., 0., 0., 0.], [0., 0., 0., 0., 0.], [0., 0., 0., 0., 0.], [0., 0., 0., 0., 0.], [0., 0., 0., 0., 0.], [0., 0., 0., 0., 0.]])

Convolution as matrix multiplication

# K = torch.tensor([[0, 1], [2, 3]]) K = torch.tensor([[1, 2], [3, 4]]) print(K) def kernel2matrix(K): k, W = torch.zeros(5), torch.zeros((4, 9)) k[:2], k[3:5] = K[0, :], K[1, :] W[0, :5], W[1, 1:6], W[2, 3:8], W[3, 4:] = k, k, k, k return W W = kernel2matrix(K) print(W)
tensor([[1, 2], [3, 4]]) tensor([[1., 2., 0., 3., 4., 0., 0., 0., 0.], [0., 1., 2., 0., 3., 4., 0., 0., 0.], [0., 0., 0., 1., 2., 0., 3., 4., 0.], [0., 0., 0., 0., 1., 2., 0., 3., 4.]])
X = torch.arange(9.0).reshape(3, 3) Y = corr2d(X, K) print(Y) Y2 = torch.mv(W, X.reshape(-1)).reshape(2, 2) assert np.allclose(Y, Y2)
tensor([[27., 37.], [57., 67.]])

Optimizing the kernel parameters

Let's learn a kernel to match the output of our manual edge detector.

# Construct a two-dimensional convolutional layer with 1 output channel and a # kernel of shape (1, 2). For the sake of simplicity, we ignore the bias here conv2d = nn.Conv2d(1, 1, kernel_size=(1, 2), bias=False) # The two-dimensional convolutional layer uses four-dimensional input and # output in the format of (example channel, height, width), where the batch # size (number of examples in the batch) and the number of channels are both 1 # Defining X and Y again. X = torch.ones((6, 8)) X[:, 2:6] = 0 K = torch.tensor([[1.0, -1.0]]) Y = corr2d(X, K) X = X.reshape((1, 1, 6, 8)) Y = Y.reshape((1, 1, 6, 7)) for i in range(10): Y_hat = conv2d(X) l = (Y_hat - Y) ** 2 conv2d.zero_grad() l.sum().backward() # Update the kernel conv2d.weight.data[:] -= 3e-2 * conv2d.weight.grad if (i + 1) % 2 == 0: print(f"batch {i + 1}, loss {l.sum():.3f}") print(conv2d.weight.data.reshape((1, 2)))
batch 2, loss 12.626 batch 4, loss 2.939 batch 6, loss 0.829 batch 8, loss 0.277 batch 10, loss 0.103 tensor([[ 1.0161, -0.9523]])

Multiple input channels

def corr2d(X, K): """Compute 2D cross-correlation.""" h, w = K.shape Y = torch.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1)) for i in range(Y.shape[0]): for j in range(Y.shape[1]): Y[i, j] = torch.sum((X[i : i + h, j : j + w] * K)) return Y
def corr2d_multi_in(X, K): # First, iterate through the 0th dimension (channel dimension) of `X` and # `K`. Then, add them together return sum(corr2d(x, k) for x, k in zip(X, K)) X = torch.tensor( [[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]], [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]] ) K = torch.tensor([[[0.0, 1.0], [2.0, 3.0]], [[1.0, 2.0], [3.0, 4.0]]]) print(X.shape) # 2 channels, each 3x3 print(K.shape) # 2 sets of 2x2 filters out = corr2d_multi_in(X, K) print(out.shape) print(out)
torch.Size([2, 3, 3]) torch.Size([2, 2, 2]) torch.Size([2, 2]) tensor([[ 56., 72.], [104., 120.]])

Multiple output channels

def corr2d_multi_in_out(X, K): # Iterate through the 0th dimension of `K`, and each time, perform # cross-correlation operations with input `X`. All of the results are # stacked together return torch.stack([corr2d_multi_in(X, k) for k in K], 0) K = torch.stack((K, K + 1, K + 2), 0) print(K.shape) out = corr2d_multi_in_out(X, K) print(out.shape)
torch.Size([3, 2, 2, 2]) torch.Size([3, 2, 2])

1x1 convolution

# 1x1 conv is same as multiplying each feature column at each pixel # by a fully connected matrix def corr2d_multi_in_out_1x1(X, K): c_i, h, w = X.shape c_o = K.shape[0] X = X.reshape((c_i, h * w)) K = K.reshape((c_o, c_i)) Y = torch.matmul(K, X) # Matrix multiplication in the fully-connected layer return Y.reshape((c_o, h, w)) X = torch.normal(0, 1, (3, 3, 3)) # 3 channels per pixel K = torch.normal(0, 1, (2, 3, 1, 1)) # map from 3 channels to 2 Y1 = corr2d_multi_in_out_1x1(X, K) Y2 = corr2d_multi_in_out(X, K) print(Y2.shape) assert float(torch.abs(Y1 - Y2).sum()) < 1e-6
torch.Size([2, 3, 3])

Pooling

def pool2d(X, pool_size, mode="max"): p_h, p_w = pool_size Y = torch.zeros((X.shape[0] - p_h + 1, X.shape[1] - p_w + 1)) for i in range(Y.shape[0]): for j in range(Y.shape[1]): if mode == "max": Y[i, j] = X[i : i + p_h, j : j + p_w].max() elif mode == "avg": Y[i, j] = X[i : i + p_h, j : j + p_w].mean() return Y
# X = torch.arange(16, dtype=torch.float32).reshape((1, 1, 4, 4)) X = torch.arange(16, dtype=torch.float32).reshape((4, 4)) print(X) print(X.shape) print(pool2d(X, (3, 3), "max"))
tensor([[ 0., 1., 2., 3.], [ 4., 5., 6., 7.], [ 8., 9., 10., 11.], [12., 13., 14., 15.]]) torch.Size([4, 4]) tensor([[10., 11.], [14., 15.]])
X = torch.arange(16, dtype=torch.float32).reshape((1, 1, 4, 4)) pool2d = nn.MaxPool2d(3, padding=0, stride=1) print(pool2d(X))
tensor([[[[10., 11.], [14., 15.]]]])