GitHub Repository: probml/pyprobml
Path: blob/master/notebooks/book1/14/conv2d_torch.ipynb
¹¹⁹² views

Kernel: Python 3

Please find jax implementation of this notebook here: https://colab.research.google.com/github/probml/pyprobml/blob/master/notebooks/book1/14/conv2d_jax.ipynb

Foundations of Convolutional neural nets

Based on sec 6.2 of http://d2l.ai/chapter_convolutional-neural-networks/conv-layer.html

In [ ]:

import numpy as np
import matplotlib.pyplot as plt

np.random.seed(seed=1)
import math

try:
    import torch
except ModuleNotFoundError:
    %pip install -qq torch
    import torch
from torch import nn
from torch.nn import functional as F

!mkdir figures # for saving plots

import warnings

warnings.filterwarnings("ignore")

# For reproducibility on different runs
torch.backends.cudnn.deterministic = True
torch.manual_seed(hash("by removing stochasticity") % 2**32 - 1)
torch.cuda.manual_seed_all(hash("so runs are repeatable") % 2**32 - 1)

Cross correlation

In [ ]:

# Cross correlation


def corr2d(X, K):
    """Compute 2D cross-correlation."""
    h, w = K.shape
    Y = torch.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            Y[i, j] = (X[i : i + h, j : j + w] * K).sum()
    return Y


X = torch.tensor([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]])
K = torch.tensor([[0.0, 1.0], [2.0, 3.0]])
print(corr2d(X, K))

tensor([[19., 25.],
        [37., 43.]])

Edge detection

We make a small image X of 1s, with a vertical stripe (of width 4) of 0s in the middle.

In [ ]:

X = torch.ones((6, 8))
X[:, 2:6] = 0
X

tensor([[1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.]])

Now we apply a vertical edge detector. It fires on the 1-0 and 0-1 boundaries.

In [ ]:

K = torch.tensor([[1.0, -1.0]])
Y = corr2d(X, K)
print(Y)

tensor([[ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.]])

It fails to detect horizontal edges.

In [ ]:

corr2d(X.t(), K)

tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]])

Convolution as matrix multiplication

In [ ]:

# K = torch.tensor([[0, 1], [2, 3]])
K = torch.tensor([[1, 2], [3, 4]])

print(K)


def kernel2matrix(K):
    k, W = torch.zeros(5), torch.zeros((4, 9))
    k[:2], k[3:5] = K[0, :], K[1, :]
    W[0, :5], W[1, 1:6], W[2, 3:8], W[3, 4:] = k, k, k, k
    return W


W = kernel2matrix(K)
print(W)

tensor([[1, 2],
        [3, 4]])
tensor([[1., 2., 0., 3., 4., 0., 0., 0., 0.],
        [0., 1., 2., 0., 3., 4., 0., 0., 0.],
        [0., 0., 0., 1., 2., 0., 3., 4., 0.],
        [0., 0., 0., 0., 1., 2., 0., 3., 4.]])

In [ ]:

X = torch.arange(9.0).reshape(3, 3)
Y = corr2d(X, K)
print(Y)

Y2 = torch.mv(W, X.reshape(-1)).reshape(2, 2)
assert np.allclose(Y, Y2)

tensor([[27., 37.],
        [57., 67.]])

Optimizing the kernel parameters

Let's learn a kernel to match the output of our manual edge detector.

In [ ]:

# Construct a two-dimensional convolutional layer with 1 output channel and a
# kernel of shape (1, 2). For the sake of simplicity, we ignore the bias here
conv2d = nn.Conv2d(1, 1, kernel_size=(1, 2), bias=False)

# The two-dimensional convolutional layer uses four-dimensional input and
# output in the format of (example channel, height, width), where the batch
# size (number of examples in the batch) and the number of channels are both 1
# Defining X and Y again.
X = torch.ones((6, 8))
X[:, 2:6] = 0

K = torch.tensor([[1.0, -1.0]])
Y = corr2d(X, K)

X = X.reshape((1, 1, 6, 8))
Y = Y.reshape((1, 1, 6, 7))

for i in range(10):
    Y_hat = conv2d(X)
    l = (Y_hat - Y) ** 2
    conv2d.zero_grad()
    l.sum().backward()
    # Update the kernel
    conv2d.weight.data[:] -= 3e-2 * conv2d.weight.grad
    if (i + 1) % 2 == 0:
        print(f"batch {i + 1}, loss {l.sum():.3f}")

print(conv2d.weight.data.reshape((1, 2)))

batch 2, loss 12.626
batch 4, loss 2.939
batch 6, loss 0.829
batch 8, loss 0.277
batch 10, loss 0.103
tensor([[ 1.0161, -0.9523]])

Multiple input channels

In [ ]:

def corr2d(X, K):
    """Compute 2D cross-correlation."""
    h, w = K.shape
    Y = torch.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            Y[i, j] = torch.sum((X[i : i + h, j : j + w] * K))
    return Y

In [ ]:

def corr2d_multi_in(X, K):
    # First, iterate through the 0th dimension (channel dimension) of `X` and
    # `K`. Then, add them together
    return sum(corr2d(x, k) for x, k in zip(X, K))


X = torch.tensor(
    [[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]], [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]]
)
K = torch.tensor([[[0.0, 1.0], [2.0, 3.0]], [[1.0, 2.0], [3.0, 4.0]]])

print(X.shape)  # 2 channels, each 3x3
print(K.shape)  # 2 sets of 2x2 filters
out = corr2d_multi_in(X, K)
print(out.shape)
print(out)

torch.Size([2, 3, 3])
torch.Size([2, 2, 2])
torch.Size([2, 2])
tensor([[ 56.,  72.],
        [104., 120.]])

Multiple output channels

In [ ]:

def corr2d_multi_in_out(X, K):
    # Iterate through the 0th dimension of `K`, and each time, perform
    # cross-correlation operations with input `X`. All of the results are
    # stacked together
    return torch.stack([corr2d_multi_in(X, k) for k in K], 0)


K = torch.stack((K, K + 1, K + 2), 0)
print(K.shape)
out = corr2d_multi_in_out(X, K)
print(out.shape)

torch.Size([3, 2, 2, 2])
torch.Size([3, 2, 2])

1x1 convolution

In [ ]:

# 1x1 conv is same as multiplying each feature column at each pixel
# by a fully connected matrix
def corr2d_multi_in_out_1x1(X, K):
    c_i, h, w = X.shape
    c_o = K.shape[0]
    X = X.reshape((c_i, h * w))
    K = K.reshape((c_o, c_i))
    Y = torch.matmul(K, X)  # Matrix multiplication in the fully-connected layer
    return Y.reshape((c_o, h, w))


X = torch.normal(0, 1, (3, 3, 3))  # 3 channels per pixel
K = torch.normal(0, 1, (2, 3, 1, 1))  # map from 3 channels to 2

Y1 = corr2d_multi_in_out_1x1(X, K)
Y2 = corr2d_multi_in_out(X, K)
print(Y2.shape)
assert float(torch.abs(Y1 - Y2).sum()) < 1e-6

torch.Size([2, 3, 3])

Pooling

In [ ]:

def pool2d(X, pool_size, mode="max"):
    p_h, p_w = pool_size
    Y = torch.zeros((X.shape[0] - p_h + 1, X.shape[1] - p_w + 1))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            if mode == "max":
                Y[i, j] = X[i : i + p_h, j : j + p_w].max()
            elif mode == "avg":
                Y[i, j] = X[i : i + p_h, j : j + p_w].mean()
    return Y

In [ ]:

# X = torch.arange(16, dtype=torch.float32).reshape((1, 1, 4, 4))
X = torch.arange(16, dtype=torch.float32).reshape((4, 4))
print(X)
print(X.shape)
print(pool2d(X, (3, 3), "max"))

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.],
        [12., 13., 14., 15.]])
torch.Size([4, 4])
tensor([[10., 11.],
        [14., 15.]])

In [ ]:

X = torch.arange(16, dtype=torch.float32).reshape((1, 1, 4, 4))
pool2d = nn.MaxPool2d(3, padding=0, stride=1)
print(pool2d(X))

tensor([[[[10., 11.],
          [14., 15.]]]])

In [ ]: