import numpy as np123##########################4### MODEL5##########################67def sigmoid(z):8return 1. / (1. + np.exp(-z))91011def int_to_onehot(y, num_labels):1213ary = np.zeros((y.shape[0], num_labels))14for i, val in enumerate(y):15ary[i, val] = 11617return ary181920class NeuralNetMLP:2122def __init__(self, num_features, num_hidden, num_classes, random_seed=123):23super().__init__()2425self.num_classes = num_classes2627# hidden28rng = np.random.RandomState(random_seed)2930self.weight_h = rng.normal(31loc=0.0, scale=0.1, size=(num_hidden, num_features))32self.bias_h = np.zeros(num_hidden)3334# output35self.weight_out = rng.normal(36loc=0.0, scale=0.1, size=(num_classes, num_hidden))37self.bias_out = np.zeros(num_classes)3839def forward(self, x):40# Hidden layer41# input dim: [n_examples, n_features] dot [n_hidden, n_features].T42# output dim: [n_examples, n_hidden]43z_h = np.dot(x, self.weight_h.T) + self.bias_h44a_h = sigmoid(z_h)4546# Output layer47# input dim: [n_examples, n_hidden] dot [n_classes, n_hidden].T48# output dim: [n_examples, n_classes]49z_out = np.dot(a_h, self.weight_out.T) + self.bias_out50a_out = sigmoid(z_out)51return a_h, a_out5253def backward(self, x, a_h, a_out, y):5455#########################56### Output layer weights57#########################5859# onehot encoding60y_onehot = int_to_onehot(y, self.num_classes)6162# Part 1: dLoss/dOutWeights63## = dLoss/dOutAct * dOutAct/dOutNet * dOutNet/dOutWeight64## where DeltaOut = dLoss/dOutAct * dOutAct/dOutNet65## for convenient re-use6667# input/output dim: [n_examples, n_classes]68d_loss__d_a_out = 2.*(a_out - y_onehot) / y.shape[0]6970# input/output dim: [n_examples, n_classes]71d_a_out__d_z_out = a_out * (1. - a_out) # sigmoid derivative7273# output dim: [n_examples, n_classes]74delta_out = d_loss__d_a_out * d_a_out__d_z_out # "delta (rule) placeholder"7576# gradient for output weights7778# [n_examples, n_hidden]79d_z_out__dw_out = a_h8081# input dim: [n_classes, n_examples] dot [n_examples, n_hidden]82# output dim: [n_classes, n_hidden]83d_loss__dw_out = np.dot(delta_out.T, d_z_out__dw_out)84d_loss__db_out = np.sum(delta_out, axis=0)858687#################################88# Part 2: dLoss/dHiddenWeights89## = DeltaOut * dOutNet/dHiddenAct * dHiddenAct/dHiddenNet * dHiddenNet/dWeight9091# [n_classes, n_hidden]92d_z_out__a_h = self.weight_out9394# output dim: [n_examples, n_hidden]95d_loss__a_h = np.dot(delta_out, d_z_out__a_h)9697# [n_examples, n_hidden]98d_a_h__d_z_h = a_h * (1. - a_h) # sigmoid derivative99100# [n_examples, n_features]101d_z_h__d_w_h = x102103# output dim: [n_hidden, n_features]104d_loss__d_w_h = np.dot((d_loss__a_h * d_a_h__d_z_h).T, d_z_h__d_w_h)105d_loss__d_b_h = np.sum((d_loss__a_h * d_a_h__d_z_h), axis=0)106107return d_loss__dw_out, d_loss__db_out, d_loss__d_w_h, d_loss__d_b_h108109