Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
deeplearningzerotoall
GitHub Repository: deeplearningzerotoall/PyTorch
Path: blob/master/CNN/lab-10-X1-mnist_back_prop.ipynb
621 views
Kernel: Python 3
# Lab 10 MNIST and softmax import torch import torchvision.datasets as dsets import torchvision.transforms as transforms
device = 'cuda' if torch.cuda.is_available() else 'cpu' # for reproducibility torch.manual_seed(777) if device == 'cuda': torch.cuda.manual_seed_all(777)
# parameters learning_rate = 0.5 batch_size = 10
# MNIST dataset mnist_train = dsets.MNIST(root='MNIST_data/', train=True, transform=transforms.ToTensor(), download=True) mnist_test = dsets.MNIST(root='MNIST_data/', train=False, transform=transforms.ToTensor(), download=True)
# dataset loader data_loader = torch.utils.data.DataLoader(dataset=mnist_train, batch_size=batch_size, shuffle=True, drop_last=True)
w1 = torch.nn.Parameter(torch.Tensor(784, 30)).to(device) b1 = torch.nn.Parameter(torch.Tensor(30)).to(device) w2 = torch.nn.Parameter(torch.Tensor(30, 10)).to(device) b2 = torch.nn.Parameter(torch.Tensor(10)).to(device)
torch.nn.init.normal_(w1) torch.nn.init.normal_(b1) torch.nn.init.normal_(w2) torch.nn.init.normal_(b2)
tensor([ 1.8153, 0.1568, 0.0348, 0.0334, 0.9967, 0.3957, 1.0805, 0.0302, -0.4433, -0.0206], device='cuda:0', grad_fn=<CopyBackwards>)
def sigma(x): # sigmoid function return 1.0 / (1.0 + torch.exp(-x)) # return torch.div(torch.tensor(1), torch.add(torch.tensor(1.0), torch.exp(-x)))
def sigma_prime(x): # derivative of the sigmoid function return sigma(x) * (1 - sigma(x))
X_test = mnist_test.test_data.view(-1, 28 * 28).float().to(device)[:1000] Y_test = mnist_test.test_labels.to(device)[:1000] i = 0 while not i == 10000: for X, Y in data_loader: i += 1 # forward X = X.view(-1, 28 * 28).to(device) Y = torch.zeros((batch_size, 10)).scatter_(1, Y.unsqueeze(1), 1).to(device) # one-hot l1 = torch.add(torch.matmul(X, w1), b1) a1 = sigma(l1) l2 = torch.add(torch.matmul(a1, w2), b2) y_pred = sigma(l2) diff = y_pred - Y # Back prop (chain rule) d_l2 = diff * sigma_prime(l2) d_b2 = d_l2 d_w2 = torch.matmul(torch.transpose(a1, 0, 1), d_l2) d_a1 = torch.matmul(d_l2, torch.transpose(w2, 0, 1)) d_l1 = d_a1 * sigma_prime(l1) d_b1 = d_l1 d_w1 = torch.matmul(torch.transpose(X, 0, 1), d_l1) w1 = w1 - learning_rate * d_w1 b1 = b1 - learning_rate * torch.mean(d_b1, 0) w2 = w2 - learning_rate * d_w2 b2 = b2 - learning_rate * torch.mean(d_b2, 0) if i % 1000 == 0: l1 = torch.add(torch.matmul(X_test, w1), b1) a1 = sigma(l1) l2 = torch.add(torch.matmul(a1, w2), b2) y_pred = sigma(l2) acct_mat = torch.argmax(y_pred, 1) == Y_test acct_res = acct_mat.sum() print(acct_res.item()) if i == 10000: break
829 846 870 876 881 892 894 894 891 898